def run_dc(epoch): end = time.time() model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(train_loader, model, 50000) # cluster the features clustering_loss = deepcluster.cluster(features) # assign pseudo-labels dataset_train = datasets.CIFAR10('../data', train=True, transform=transforms.Compose([ transforms.Pad(4), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ])) train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset_train) # uniformly sample per target sampler = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader_new = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, num_workers=4, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU().cuda()) model.classifier = nn.Sequential(*mlp) # model.classifier = None model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels print(f'Clustering epoch {epoch} cost time: {time.time() - end} s') loss = train(train_dataloader_new, model, criterion, optimizer, epoch) print(f'Train Epoch: {epoch}:\tLoss: {loss}')
def assess_acc(args, test_dataset, test_dataloader, model, num_imgs): # new clusterer deepcluster = clustering.__dict__[args.clustering](args.gt_k) features = compute_features(args, test_dataloader, model, num_imgs) assess_cluster_loss = deepcluster.cluster(features, proc_feat=args.proc_feat, verbose=args.verbose) # print("images_list sizes of clusterer after cluster") # for i in xrange(len(deepcluster.images_lists)): # print("gt_k: %d (%d)" % (i, len(deepcluster.images_lists[i]))) # non shuffled relabelled_test_dataset = clustering.cluster_assign(args, deepcluster.images_lists, test_dataset) assert (num_imgs == len(test_dataset)) assert (num_imgs == len(relabelled_test_dataset)) # non shuffled true_labels = np.array([test_dataset[i][1] for i in xrange(num_imgs)]) predicted_labels = np.array( [relabelled_test_dataset[i][1] for i in xrange(num_imgs)]) assert (true_labels.min() == 0) assert (true_labels.max() == args.gt_k - 1) assert (predicted_labels.min() >= 0) assert (predicted_labels.max() < args.gt_k) # hungarian matching num_correct = np.zeros((args.gt_k, args.gt_k)) for i in xrange(num_imgs): num_correct[predicted_labels[i], true_labels[i]] += 1 match = linear_assignment(num_imgs - num_correct) reordered_preds = np.zeros(num_imgs, dtype="int") for pred_i, target_i in match: reordered_preds[predicted_labels == pred_i] = target_i distribution, centroid_min_max = analyse(reordered_preds, args.gt_k, deepcluster.centroids) acc = compute_acc(reordered_preds, true_labels, args.gt_k) return acc, distribution, centroid_min_max, assess_cluster_loss
def main(): args_config = args_parser() config = merge_config(args_config) train_loader, dev_loader, test_loader, num_train_steps, label_list = load_data( config) config.num_train_step = num_train_steps # reload the model model, optimizer, device, n_gpu = load_model(config, num_train_steps, label_list, args_config.pretrain) deepcluster = clustering.__dict__[config.clustering](config.num_clusters) dev_best_acc = 0 dev_best_f1 = 0 test_best_acc = 0 test_best_f1 = 0 for epoch in range(int(config.num_train_epochs)): print("#######" * 10) print("EPOCH: ", str(epoch)) features, train_input_ids, train_input_mask, train_segment_ids, train_ner_cate, train_start_pos, train_end_pos \ = compute_features(train_loader, model, len(train_loader), config, device) clustering_loss, cluster_var = deepcluster.cluster( features, config.view_number, config.cluster_layer, config.pca_dim, config.clus_niter, epoch, verbose=True) train_cluster_loader = get_cluster_dataloader( config, train_input_ids, train_input_mask, train_segment_ids, train_ner_cate, train_start_pos, train_end_pos) cluster_dict = clustering.cluster_assign(deepcluster.images_lists) model, dev_best_acc, dev_best_f1, test_best_acc, test_best_f1 = train_cluster( model, optimizer, train_cluster_loader, dev_loader, test_loader, dev_best_acc, dev_best_f1, test_best_acc, test_best_f1, config, device, n_gpu, label_list, cluster_dict, cluster_var, epoch) print("=&=" * 15) print("DEV: current best f1, acc") print(dev_best_f1, dev_best_acc) print("TEST: current bes f1, acc") print(test_best_f1, test_best_acc) print("=&=" * 15)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) logs = [] # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) if args.arch == 'inceptionv1': model = models.__dict__[args.arch]( sobel=args.sobel, weight_file='/home/farbod/honours/convert/kit_pytorch.npy') else: model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None if args.arch == 'inceptionv1': for key in model.modules(): if isinstance(key, nn.Module): continue key = torch.nn.DataParallel(key).cuda() else: model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True #for param in model.parameters(): # param.requires_grad = False #for param in model.classifier.parameters(): # param.requires_grad = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) #optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) plot_dir = os.path.join(args.exp, 'plots') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) tra = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # get ground truth labels for nmi num_classes = 65 labels = [[] for i in range(num_classes)] for i, (_, label) in enumerate(dataset.imgs): labels[label].append(i) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss, plot, davg = deepcluster.cluster(features, verbose=args.verbose) print davg if epoch < 20: plot.savefig(os.path.join(plot_dir, 'e{}'.format(epoch))) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) #for i, image in enumerate(train_dataset): # save_dir = os.path.join('./viz_emb_start', str(image[1])) # if not os.path.isdir(save_dir): # os.makedirs(save_dir) # imn = (image[0].data.cpu().numpy() * 112) + 112 # imn = np.swapaxes(imn, 0, 2) # imn = np.swapaxes(imn, 1, 0) # #print imn.astype('uint8') # #print imn.astype('uint8').shape # im = Image.fromarray(imn.astype('uint8')) # im.save(os.path.join(save_dir, '{}.jpg'.format(i))) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print( '###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) nmi_prev = 0 nmi_gt = 0 try: nmi_prev = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format( nmi_prev)) except IndexError: pass nmi_gt = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(labels)) print('NMI against ground-truth labels: {0:.3f}'.format(nmi_gt)) print('####################### \n') logs.append([epoch, clustering_loss, loss, nmi_prev, nmi_gt, davg]) # save running checkpoint if (epoch + 1) % 10 == 0 or epoch == 0: torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint_{}.pth.tar'.format(epoch + 1))) # save cluster assignments cluster_log.log(deepcluster.images_lists) scipy.io.savemat(os.path.join(args.exp, 'logs.mat'), {'logs': np.array(logs)})
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True print('CNN builded.') # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) print('Optimizer created.') # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in list(checkpoint['state_dict']): if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) epochs_log = Logger(os.path.join(args.exp, 'epochs300')) # Loading and preprocessing of data: custom Rescale and ToTensor transformations for VidDataset. # VidDataset has a box_frame, which is a pandas Dataframe containing images path an their bb coordinates. # Each VidDataset sample is a dict formed by a tensor (the image) and crop_coord (bb xmin, xmax, ymin, ymax). # If a pickled dataset is passed, it will be deserialized and used, else it will be normally loaded. # It is useful when we want to preprocess a dataset. print('Start loading dataset...') end = time.time() if args.dataset_pkl: dataset = deserialize_obj(args.dataset_pkl) # I will never use labels in deepcluster dataset.vid_labels = None else: tra = [preprocessing.Rescale((224, 224)), preprocessing.ToTensor()] dataset = VidDataset(xml_annotations_dir=args.ann, root_dir=args.data, transform=transforms.Compose(tra)) dataset.imgs = dataset.imgs[0::args.load_step] dataset.samples = dataset.samples[0::args.load_step] print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # calculate batch size sum (better clean-up data with clean_data.py) dataset_len = 0 if not args.dataset_pkl: dataloader.collate_fn = my_collate for s in dataloader: dataset_len += len(s['image']) else: dataset_len = len(dataset.imgs) print("Dataset final dimension: ", dataset_len) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset hardcoded dataset dim for step=5 features = compute_features(dataloader, model, args.load_step, dataset_len) # cluster the features if args.verbose: print('Cluster the features') clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels if args.verbose: print('Assign pseudo labels') train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformly sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) if not args.dataset_pkl: train_dataloader.collate_fn = my_collate # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) epoch_log = [epoch, time.time() - end, clustering_loss, loss] try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) epoch_log.append(nmi) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists) epochs_log.log(epoch_log)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) criterion_pseudo = nn.CrossEntropyLoss() criterion_sup = nn.CrossEntropyLoss(ignore_index=-1, weight=torch.Tensor([10, 300, 250]).to( device=device, dtype=torch.double)) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) ''' ########################################## ########################################## # Model definition ########################################## ##########################################''' model = models.__dict__[args.arch](bn=True, num_cluster=args.nmb_cluster, num_category=args.nmb_category) fd = int(model.cluster_layer[0].weight.size() [1]) # due to transpose, fd is input dim of W (in dim, out dim) model.cluster_layer = None model.category_layer = None model.features = torch.nn.DataParallel(model.features) model.to(device, dtype=torch.double) cudnn.benchmark = True if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_body = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer_body = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) ''' ############### ############### category_layer ############### ############### ''' model.category_layer = nn.Sequential( nn.Linear(fd, args.nmb_category), nn.Softmax(dim=1), ) model.category_layer[0].weight.data.normal_(0, 0.01) model.category_layer[0].bias.data.zero_() model.category_layer.to(device, dtype=torch.double) if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_category = torch.optim.Adam( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer_category = torch.optim.SGD( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) ''' ######################################## ######################################## Create echogram sampling index ######################################## ########################################''' print('Sample echograms.') dataset_cp, dataset_semi = sampling_echograms_full_for_comparisonP2( args) # For comparison (paper #2) dataloader_cp = torch.utils.data.DataLoader(dataset_cp, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataloader_semi = torch.utils.data.DataLoader(dataset_semi, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataset_te = sampling_echograms_test_for_comparisonP2() dataloader_test = torch.utils.data.DataLoader(dataset_te, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataset_2019, label_2019, patch_loc = sampling_echograms_2019_for_comparisonP2( ) dataloader_2019 = torch.utils.data.DataLoader( dataset_2019, batch_size=1, shuffle=False, num_workers=args.workers, worker_init_fn=np.random.seed, drop_last=False, pin_memory=True) deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, args.pca) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top located layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'cluster_layer' in key: del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer_body.load_state_dict(checkpoint['optimizer_body']) optimizer_category.load_state_dict( checkpoint['optimizer_category']) category_save = os.path.join(args.exp, 'category_layer.pth.tar') if os.path.isfile(category_save): category_layer_param = torch.load(category_save) model.category_layer.load_state_dict(category_layer_param) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) exp_test = os.path.join(args.exp, 'test') for dir_2 in ['2019', 'pred']: dir_to_make = os.path.join(exp_test, dir_2) if not os.path.isdir(dir_to_make): os.makedirs(dir_to_make) ''' ####################### ####################### MAIN TRAINING ####################### #######################''' for epoch in range(args.start_epoch, args.epochs): print( '##################### Start training at Epoch %d ################' % epoch) model.classifier = nn.Sequential( *list(model.classifier.children()) [:-1]) # remove ReLU at classifier [:-1] model.cluster_layer = None model.category_layer = None ''' ####################### ####################### PSEUDO-LABEL GENERATION ####################### ####################### ''' print('Cluster the features') features_train, input_tensors_train, labels_train = compute_features_for_comparisonP2( dataloader_cp, model, len(dataset_cp) * args.for_comparisonP2_batchsize, device=device, args=args) clustering_loss, pca_features = deepcluster.cluster( features_train, verbose=args.verbose) nan_location = np.isnan(pca_features) inf_location = np.isinf(pca_features) if (not np.allclose(nan_location, 0)) or (not np.allclose( inf_location, 0)): print('PCA: Feature NaN or Inf found. Nan count: ', np.sum(nan_location), ' Inf count: ', np.sum(inf_location)) print('Skip epoch ', epoch) torch.save(pca_features, 'tr_pca_NaN_%d.pth.tar' % epoch) torch.save(features_train, 'tr_feature_NaN_%d.pth.tar' % epoch) continue print('Assign pseudo labels') size_cluster = np.zeros(len(deepcluster.images_lists)) for i, _list in enumerate(deepcluster.images_lists): size_cluster[i] = len(_list) print('size in clusters: ', size_cluster) img_label_pair_train = zip_img_label(input_tensors_train, labels_train) train_dataset = clustering.cluster_assign( deepcluster.images_lists, img_label_pair_train) # Reassigned pseudolabel # uniformly sample per target sampler_train = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.for_comparisonP2_batchsize, #args.batch shuffle=False, num_workers=args.workers, sampler=sampler_train, pin_memory=True, ) ''' #################################################################### #################################################################### TRSNSFORM MODEL FOR SELF-SUPERVISION // SEMI-SUPERVISION #################################################################### #################################################################### ''' # Recover classifier with ReLU (that is not used in clustering) mlp = list(model.classifier.children( )) # classifier that ends with linear(512 * 128). No ReLU at the end mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.classifier.to(device=device, dtype=torch.double) '''SELF-SUPERVISION (PSEUDO-LABELS)''' model.category_layer = None model.cluster_layer = nn.Sequential( nn.Linear(fd, args.nmb_cluster), # nn.Linear(4096, num_cluster), nn.Softmax( dim=1 ), # should be removed and replaced by ReLU for category_layer ) model.cluster_layer[0].weight.data.normal_(0, 0.01) model.cluster_layer[0].bias.data.zero_() # model.cluster_layer = model.cluster_layer.double() model.cluster_layer.to(device=device, dtype=torch.double) ''' train network with clusters as pseudo-labels ''' with torch.autograd.set_detect_anomaly(True): pseudo_loss, semi_loss, semi_accuracy = semi_train_for_comparisonP2( train_dataloader, dataloader_semi, model, fd, criterion_pseudo, criterion_sup, optimizer_body, optimizer_category, epoch, device=device, args=args) # save checkpoint if epoch % args.checkpoints == 0: path = os.path.join( args.exp, 'checkpoints', str(epoch) + '_checkpoint.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, path) torch.save( model.category_layer.state_dict(), os.path.join(args.exp, 'checkpoints', '%d_category_layer.pth.tar' % epoch)) # save running checkpoint torch.save( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, os.path.join(args.exp, 'checkpoint.pth.tar')) torch.save(model.category_layer.state_dict(), os.path.join(args.exp, 'category_layer.pth.tar')) ''' ############## ############## # TEST phase ############## ############## ''' test_loss, test_accuracy, test_pred, test_label, test_pred_softmax = test_for_comparisonP2( dataloader_test, model, criterion_sup, device, args) test_pred_large = rebuild_pred_patch(test_pred) test_softmax_large = rebuild_pred_patch(test_pred_softmax) test_label_large = rebuild_pred_patch(test_label) '''Save prediction of the test set''' if (epoch % args.save_epoch == 0): with open( os.path.join( args.exp, 'test', 'pred', 'pred_softmax_label_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump( [test_pred_large, test_softmax_large, test_label_large], f) fpr, \ tpr, \ roc_auc, \ roc_auc_macro, \ prob_mat, \ mat, \ f1_score, \ kappa, \ bg_accu, \ se_accu, \ ot_accu = test_analysis(test_pred_large, test_softmax_large, epoch, args) if os.path.isfile( os.path.join(args.exp, 'records_te_epoch_patch.pth.tar')): records_te_epoch = torch.load( os.path.join(args.exp, 'records_te_epoch_patch.pth.tar')) else: records_te_epoch = { 'epoch': [], 'fpr': [], 'tpr': [], 'roc_auc': [], 'roc_auc_macro': [], 'prob_mat': [], 'mat': [], 'f1_score': [], 'kappa': [], 'BG_accu_epoch': [], 'SE_accu_epoch': [], 'OT_accu_epoch': [], } records_te_epoch['epoch'].append(epoch) records_te_epoch['fpr'].append(fpr) records_te_epoch['tpr'].append(tpr) records_te_epoch['roc_auc'].append(roc_auc) records_te_epoch['roc_auc_macro'].append(roc_auc_macro) records_te_epoch['prob_mat'].append(prob_mat) records_te_epoch['mat'].append(mat) records_te_epoch['f1_score'].append(f1_score) records_te_epoch['kappa'].append(kappa) records_te_epoch['BG_accu_epoch'].append(bg_accu) records_te_epoch['SE_accu_epoch'].append(se_accu) records_te_epoch['OT_accu_epoch'].append(ot_accu) torch.save(records_te_epoch, os.path.join(args.exp, 'records_te_epoch_patch.pth.tar')) ''' ############## ############## # 2019 phase ############## ############## ''' test_loss_2019, test_accuracy_2019, test_pred_2019, test_label_2019, test_pred_softmax_2019 = test_for_comparisonP2( dataloader_2019, model, criterion_sup, device, args) test_pred_large_2019 = rebuild_pred_patch(test_pred_2019) test_softmax_large_2019 = rebuild_pred_patch(test_pred_softmax_2019) test_label_large_2019 = rebuild_pred_patch(test_label_2019) test_and_plot_2019(test_pred_large_2019, test_label_large_2019, epoch, args)
def main(args): # fix random seeds seed(31) # CNN model = MobileNetV1(num_classes=100, sobel=True) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( [x for x in model.parameters() if x.requires_grad], lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # creating cluster assignments log cluster_log = Logger(os.path.join('./image_list_log/', 'clusters')) end = time.time() # load the data dataset = datasets.ImageFolder(root=r'./dataset/train', transform=transform()) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) print('start train') # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): print(epoch) # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset), args.batch) # cluster the feature clustering_loss = deepcluster.cluster(features) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformly sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch, args.lr, args.wd) # print log # print('###### Epoch [{0}] ###### \n' # 'Time: {1:.3f} s\n' # 'Clustering loss: {2:.3f} \n' # 'ConvNet loss: {3:.3f}' # .format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) f = open('result.txt', "a") f.write('NMI against previous assignment: {0:.3f}'.format(nmi)) f.close() # print(loss) except IndexError: pass print('####################### \n') # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint keys_to_del = [] for key in checkpoint['state_dict']: if 'top_layer' in key: keys_to_del.append(key) for key in keys_to_del: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # tra = [transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize] # cf. encoder_clustering.py: already resized to 224x224 tra = [transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) print('len(dataset)...............:', len(dataset)) print('DataLoader...') dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) print('...DataLoader') # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) ###################################################################### # get the features for the whole dataset print('compute_features...') features = compute_features(dataloader, model, len(dataset)) print('features.shape.:', features.shape) # cluster the features print('deepcluster.cluster...') deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels print('clustering.cluster_assign...') train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') # print('cluster_0: %s' % str(deepcluster.images_lists[0])) # assert len(features) == len(dataset.imgs) # for i in deepcluster.images_lists[0]: # print(i, '---', np.linalg.norm(features[i])) # print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') print('number of clusters computed: %d' % len(deepcluster.images_lists)) print('pickle clustering objects...') handle = open(os.path.join(args.exp, "features.obj"), "wb") pickle.dump(features, handle) handle.close() handle = open(os.path.join(args.exp, "train_dataset.obj"), "wb") pickle.dump(train_dataset, handle) handle.close() handle = open(os.path.join(args.exp, "images_lists.obj"), "wb") pickle.dump(deepcluster.images_lists, handle) handle.close() handle = open(os.path.join(args.exp, "dataset_imgs.obj"), "wb") pickle.dump(dataset.imgs, handle) handle.close() print('done.')
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}' .format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1]) ) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save({'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict()}, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN # if args.verbose: # print('Architecture: {}'.format(args.arch)) # model = load_net('149') # model = mobilenet_v3_large(pretrained=False, sobel=True, num_classes=100) model = MobileNetV3_Small(num_classes=100, sobel=True) # model = alexnet(sobel=True) # fd = 1000 # print(fd) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # creating cluster assignments log cluster_log = Logger(os.path.join('./image_list_log/', 'clusters')) normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.24703223, 0.24348512, 0.26158784]) # preprocessing of data tra = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] # load the data end = time.time() dataset = datasets.ImageFolder(r'./dataset/train', transform=transforms.Compose(tra)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # print(model.classifier) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features # if args.verbose: # print('Cluster the features') clustering_loss = deepcluster.cluster(features) # assign pseudo-labels # if args.verbose: # print('Assign pseudo labels') train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformly sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer # mlp = list() # mlp.append(nn.Linear(in_features=1024, out_features=1000, bias=True).cuda()) mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) # print(mlp) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) # print(len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) f = open('NMI_result.txt', "a") f.write('NMI against previous assignment: {0:.3f}'.format(nmi)) f.write(" epoch: %d \n" % epoch) f.close() except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, r'./exp/checkpoint_mobilenetv3_small.pth.tar') # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args args = parser.parse_args() print('args: >>>>>>>>>') print(args) print('<<<<<<<<<<<<<<<') # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN # if args.verbose: # print('Architecture: {}'.format(args.arch)) # model = models.__dict__[args.arch](sobel=args.sobel) # model.top_layer = None # model.features = torch.nn.DataParallel(model.features) # model.cuda() cudnn.benchmark = True # preprocessing of data # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # tra = [transforms.Resize(224), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize] # cf. encoder_clustering.py: already resized to 224x224 # tra = [transforms.ToTensor(), normalize] # load the data # end = time.time() # tile_name = args.tile # image_folder = os.path.join(args.data, tile_name) # print('image folder: %s' % image_folder) # dataset = datasets.ImageFolder(image_folder, transform=transforms.Compose(tra)) # print('Load dataset: {0:.2f} s'.format(time.time() - end)) # print('len(dataset)...............:', len(dataset)) # print('DataLoader...') # dataloader = torch.utils.data.DataLoader(dataset, # batch_size=args.batch, # num_workers=args.workers, # pin_memory=True) # print('...DataLoader') # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # remove head # model.top_layer = None # model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) ###################################################################### # get the features for the whole dataset if args.features and os.path.exists(args.features): handle = open(args.features, "rb") print('load features from filesystem...: %s' % args.features) features = pickle.load(handle) print('... loaded.') handle.close() handle = open(args.features_fn, "rb") print('load features_fn from filesystem...: %s' % args.features_fn) features_fn = pickle.load(handle) print('... loaded.') handle.close() else: print('compute_features...') assert 1 == 0, "not supported at moment" #features = compute_features(dataloader, model, len(dataset)) #print('...compute_features.') print('features.shape.:', features.shape) # cluster the features print('deepcluster.cluster...') deepcluster.cluster(features, verbose=args.verbose) num_in_clusters = 0 for cluster_index, cluster in enumerate(deepcluster.images_lists): print("cluster %d: %d entries." % (cluster_index, len(cluster))) num_in_clusters += len(cluster) # assign pseudo-labels print('clustering.cluster_assign...') data = clustering.cluster_assign(deepcluster.images_lists, features_fn) print('number of clusters computed: %d' % len(deepcluster.images_lists)) file_out = os.path.join(os.path.dirname(args.features_fn), "fname_pseudolabel.obj") print('pickle list with pseudolabels to \'%s\'...' % file_out) handle = open(file_out, "wb") pickle.dump(data.imgs, handle) handle.close() assert num_in_clusters == len(data.imgs) print("in total %d tuples pickled." % len(data.imgs)) # # cf. also coco_knn.py # k = int(args.knn) # # check_small_clusters(deepcluster, k) # # tile_to_10nn = {} # d = features.shape[1] # dimension # # for cluster_index, cluster in enumerate(deepcluster.images_lists): # print('processing cluster %d -->' % (cluster_index + 1)) # start = time.time() # ##################################################### # # # calculate 10-NN for each feature of current cluster # cluster_feature_ids = cluster # # res = faiss.StandardGpuResources() # flat_config = faiss.GpuIndexFlatConfig() # flat_config.useFloat16 = False # flat_config.device = 0 # index = faiss.GpuIndexFlatL2(res, d, flat_config) # # num_features = len(cluster_feature_ids) # cluster_features = np.zeros((num_features, features.shape[1])).astype('float32') # for ind, id in enumerate(cluster_feature_ids): # # print(ind, '-', id) # cluster_features[ind] = features[id] # # print('cluster_features.shape = %s' % str(cluster_features.shape)) # index.add(cluster_features) # # l2_knn, knn = index.search(cluster_features, k + 1) # +1 because 1st is feature itself # assert knn.shape[0] == cluster_features.shape[0] # # for feature_id in range(num_features): # for id_nn in range(k + 1): # id_nn: id of current nearest neighbor # id_into_dataset = cluster_feature_ids[knn[feature_id][id_nn]] # img_path = dataset.imgs[id_into_dataset][0] # name = os.path.basename(img_path).replace('_' + tile_name, '') # if id_nn == 0: # feature_img_name = name # knn_list = [] # else: # l2_dist = l2_knn[feature_id][id_nn] # tuple = (name, l2_dist) # knn_list.append(tuple) # assert len(knn_list) == k # doAdd = True # if feature_img_name in tile_to_10nn: # # special case because of duplicate images in COCO dataset (e.g. 000000000927.jpg und 000000341448.jpg) # assert knn[feature_id][0] == knn[knn[feature_id][1]][0] \ # and knn[feature_id][1] == knn[knn[feature_id][1]][1], '\n%d\n%s\n%s\n%s' % (feature_id, str(knn[feature_id]), str(l2_knn[feature_id]), str(knn)) # id_into_dataset = cluster_feature_ids[knn[feature_id][1]] # img_path = dataset.imgs[id_into_dataset][0] # name_repl = os.path.basename(img_path).replace('_' + tile_name, '') # print('duplicate images detected, replacing %s with %s...' % (feature_img_name, name_repl)) # feature_img_name = name_repl # if feature_img_name in tile_to_10nn: # print( '%s already in tile_to_10nn (size: %d, featured_id: %d)' % (feature_img_name, len(tile_to_10nn), feature_id)) # doAdd = False # # if doAdd: # assert feature_img_name not in tile_to_10nn, '%s already in tile_to_10nn (size: %d, featured_id: %d)' % \ # (feature_img_name, len(tile_to_10nn), feature_id) # tile_to_10nn[feature_img_name] = knn_list # else: # print('skip feature %s altogether...' % feature_img_name) # doAdd = True # # print(('processing cluster %d <-- [{0:.2f}s]' % (cluster_index + 1)).format(time.time() - start)) # # # if len(tile_to_10nn) != len(dataset.imgs): # # assert len(tile_to_10nn) == len(dataset.imgs), '%s vs. %s' % (str(len(tile_to_10nn)), str(len(dataset.imgs))) # print('len(tile_to_10nn) != len(dataset.imgs): %s vs. %s' % (str(len(tile_to_10nn)), str(len(dataset.imgs)))) # keys = {} # for img_name in tile_to_10nn.keys(): # keys[img_name] = 1 # for img_path in dataset.imgs: # name = os.path.basename(img_path[0]).replace('_' + tile_name, '') # if name in keys: # del keys[name] # else: # print('%s not in tile_to_10nn..' % name) # print('state of keys after iteration:') # print(keys) # # out_dir = os.path.join(args.exp, tile_name) # file_out = os.path.join(out_dir, tile_name + "_" + args.knn + "nn.obj") # print('pickle map object to \'%s\'...' % file_out) # handle = open(file_out, "wb") # pickle.dump(tile_to_10nn, handle) # handle.close() print('done.')
def main(args): # fix random seeds print('start training') torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) now = datetime.now() # load the data dataloader, dataset_train, dataloader_val, dataset_val, tsamples = load_data( args.path, args.bs, train_ratio=0.8, test_ratio=0.2) #load vgg model = Models.__dict__["vgg16"](args.sobel) # pretrained weights? fd = int(model.top_layer.weight.size()[1]) model.top_layer = None # why? do we need it here? model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) losses = np.zeros(args.ep) # loss per epoch, array of size ep x 1 accuracies = np.zeros(args.ep) losses_val = np.zeros(args.ep) accuracies_val = np.zeros(args.ep) labels = [ 573, 671 ] # move to another location, maybe outside for-loop, outside training method # for all epochs for epoch in range(args.ep): # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1] ) # The actual classifier seems missing here, why are just the children added to a list? # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset_train), args.bs, labels) features_val = compute_features(dataloader_val, model, len(dataset_val), args.bs, labels) print('PCA') pre_data = preprocessing(model, features) pre_data_val = preprocessing(model, features_val) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.k) print('clustering') deepcluster_val = clustering.__dict__[args.clustering](args.k) clustering_loss = deepcluster.cluster(pre_data, verbose=args.verbose) clustering_loss_val = deepcluster_val.cluster(pre_data_val, verbose=args.verbose) images_list = deepcluster.images_lists images_list_val = deepcluster_val.images_lists # pseudo labels print('train pseudolabels') train_dataset = clustering.cluster_assign(images_list, dataset_train) val_dataset = clustering.cluster_assign(images_list_val, dataset_val) len_d = len(train_dataset) len_val = len(val_dataset) # uniformly sample per target sampler = UnifLabelSampler(int(args.reassign * len_d), images_list) sampler2 = UnifLabelSampler(int(args.reassign * len_val), images_list_val) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.bs, sampler=sampler, pin_memory=True, ) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=args.bs, sampler=sampler2, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(images_list)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels # train network with clusters as pseudo-labels end = time.time() losses[epoch], accuracies[epoch] = train(train_dataloader, model, criterion, optimizer, epoch, args.lr, args.wd) print(f'epoch {epoch} ended with loss {losses[epoch]}') losses_val[epoch], accuracies_val[epoch] = validate( val_dataloader, model, criterion) plot_loss_acc(losses[0:epoch], losses[0:epoch], accuracies[0:epoch], accuracies[0:epoch], now, epoch, args.k, tsamples, args.ep) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, losses[epoch])) # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, f'checkpoint_{now}_k{args.k}_ep{epoch}.pth.tar')) # save cluster assignments cluster_log.log(images_list)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=False, bn=True, out=args.nmb_cluster) fd = int(model.top_layer[0].weight.size() [1]) # due to transpose, fd is input dim of W (in dim, out dim) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model = model.double() model.to(device) cudnn.benchmark = True if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_Adam, betas=(0.5, 0.99), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) criterion = nn.CrossEntropyLoss() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'top_layer' in key: del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, '../../..', 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, '../../..', 'clusters.pickle')) # # Create echogram sampling index print('Sample echograms.') end = time.time() dataset_cp = sampling_echograms_full(args) dataloader_cp = torch.utils.data.DataLoader(dataset_cp, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, args.pca) # deepcluster = clustering.Kmeans(no.cluster, dim.pca) loss_collect = [[], [], [], [], []] nmi_save = [] # for evaluation dataset_eval = sampling_echograms_eval(args) eval_dataloader = torch.utils.data.DataLoader( dataset_eval, batch_size=args.batch, shuffle=False, num_workers=args.workers, pin_memory=True, ) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())) # get the features for the whole dataset features_train, input_tensors_train, labels_train = compute_features( dataloader_cp, model, len(dataset_cp), device=device, args=args) # cluster the features print('Cluster the features') end = time.time() clustering_loss, pca_features = deepcluster.cluster( features_train, verbose=args.verbose) # deepcluster.cluster(features_train, verbose=args.verbose) print('Cluster time: {0:.2f} s'.format(time.time() - end)) nan_location = np.isnan(pca_features) inf_location = np.isinf(pca_features) if (not np.allclose(nan_location, 0)) or (not np.allclose( inf_location, 0)): print('PCA: Feature NaN or Inf found. Nan count: ', np.sum(nan_location), ' Inf count: ', np.sum(inf_location)) print('Skip epoch ', epoch) torch.save(pca_features, 'pca_NaN_%d.pth.tar' % epoch) torch.save(features_train, 'feature_NaN_%d.pth.tar' % epoch) continue # save patches per epochs cp_epoch_out = [ features_train, deepcluster.images_lists, deepcluster.images_dist_lists, input_tensors_train, labels_train ] linear_svc = SimpleClassifier(epoch, cp_epoch_out, tr_size=5, iteration=20) if args.verbose: print('###### Epoch [{0}] ###### \n' 'Classify. accu.: {1:.3f} \n' 'Pairwise classify. accu: {2} \n'.format( epoch, linear_svc.whole_score, linear_svc.pair_score)) if (epoch % args.save_epoch == 0): end = time.time() with open( os.path.join(args.exp, '../../..', 'cp_epoch_%d.pickle' % epoch), "wb") as f: pickle.dump(cp_epoch_out, f) with open( os.path.join(args.exp, '../../..', 'pca_epoch_%d.pickle' % epoch), "wb") as f: pickle.dump(pca_features, f) print('Feature save time: {0:.2f} s'.format(time.time() - end)) # assign pseudo-labels print('Assign pseudo labels') size_cluster = np.zeros(len(deepcluster.images_lists)) for i, _list in enumerate(deepcluster.images_lists): size_cluster[i] = len(_list) print('size in clusters: ', size_cluster) img_label_pair_train = zip_img_label(input_tensors_train, labels_train) train_dataset = clustering.cluster_assign( deepcluster.images_lists, img_label_pair_train) # Reassigned pseudolabel # uniformly sample per target sampler_train = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, shuffle=False, num_workers=args.workers, sampler=sampler_train, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children() ) # classifier that ends with linear(512 * 128) mlp.append(nn.ReLU().to(device)) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Sequential( nn.Linear(fd, args.nmb_cluster), nn.Softmax(dim=1), ) # model.top_layer = nn.Linear(fd, args.nmb_cluster) model.top_layer[0].weight.data.normal_(0, 0.01) model.top_layer[0].bias.data.zero_() model.top_layer = model.top_layer.double() model.top_layer.to(device) # train network with clusters as pseudo-labels end = time.time() with torch.autograd.set_detect_anomaly(True): # loss, tr_epoch_out = train(train_dataloader, model, criterion, optimizer, epoch, device=device, args=args) loss = train(train_dataloader, model, criterion, optimizer, epoch, device=device, args=args) print('Train time: {0:.2f} s'.format(time.time() - end)) # if (epoch % args.save_epoch == 0): # end = time.time() # with open(os.path.join(args.exp, '..', 'tr_epoch_%d.pickle' % epoch), "wb") as f: # pickle.dump(tr_epoch_out, f) # print('Save train time: {0:.2f} s'.format(time.time() - end)) # Accuracy with training set (output vs. pseudo label) # accuracy_tr = np.mean(tr_epoch_out[1] == np.argmax(tr_epoch_out[2], axis=1)) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'ConvNet tr_loss: {2:.3f} \n' 'Clustering loss: {3:.3f} \n'.format(epoch, time.time() - end, loss, clustering_loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) nmi_save.append(nmi) print('NMI against previous assignment: {0:.3f}'.format(nmi)) with open("./nmi_collect.pickle", "wb") as ff: pickle.dump(nmi_save, ff) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, '../../..', 'checkpoint.pth.tar')) # evaluation: echogram reconstruction if (epoch % args.save_epoch == 0): eval_epoch_out = evaluate(eval_dataloader, model, device=device, args=args) with open( os.path.join(args.exp, '../../..', 'eval_epoch_%d.pickle' % epoch), "wb") as f: pickle.dump(eval_epoch_out, f) print('epoch: ', type(epoch), epoch) print('loss: ', type(loss), loss) print('linear_svc.whole_score: ', type(linear_svc.whole_score), linear_svc.whole_score) print('linear_svc.pair_score: ', type(linear_svc.pair_score), linear_svc.pair_score) print('clustering_loss: ', type(clustering_loss), clustering_loss) loss_collect[0].append(epoch) loss_collect[1].append(loss) loss_collect[2].append(linear_svc.whole_score) loss_collect[3].append(linear_svc.pair_score) loss_collect[4].append(clustering_loss) with open(os.path.join(args.exp, '../../..', 'loss_collect.pickle'), "wb") as f: pickle.dump(loss_collect, f) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data tra = [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))] # load the data end = time.time() # MNIST-full begin:------------------------------------------- dataset = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose(tra)) true_label = dataset.train_labels.cpu().numpy() # MNIST-full end:------------------------------------------- # # FMNIST begin:------------------------------------------- # dataset = datasets.FashionMNIST('./data/fmnist', train=True, download=True, # transform=transforms.Compose(tra)) # true_label = dataset.train_labels.cpu().numpy() # # FMNIST end:------------------------------------------- # # MNIST-test begin:------------------------------------------- # dataset = datasets.MNIST('./data', train=False, download=True, # transform=transforms.Compose(tra)) # true_label = dataset.test_labels.cpu().numpy() # # MNIST-test end:------------------------------------------- # dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) # if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels # train_dataset = clustering.cluster_assign(deepcluster.images_lists, # dataset.train_data) train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.train_data) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: # print('###### Epoch [{0}] ###### \n' # 'Time: {1:.3f} s\n' # 'Clustering loss: {2:.3f} \n' # 'ConvNet loss: {3:.3f}' # .format(epoch, time.time() - end, clustering_loss, loss)) try: y_pred = clustering.arrange_clustering( deepcluster.images_lists) y_last = clustering.arrange_clustering(cluster_log.data[-1]) import metrics acc = metrics.acc(y_pred, y_last) nmi = metrics.nmi(y_pred, y_last) acc_ = metrics.acc(true_label, y_pred) nmi_ = metrics.nmi(true_label, y_pred) print( 'ACC=%.4f, NMI=%.4f; Relative ACC=%.4f, Relative NMI=%.4f' % (acc_, nmi_, acc, nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=False, bn=True, out=args.nmb_cluster) fd = int(model.top_layer[0].weight.size() [1]) # due to transpose, fd is input dim of W (in dim, out dim) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model = model.double() model.to(device) cudnn.benchmark = True # create optimizer # optimizer = torch.optim.SGD( # filter(lambda x: x.requires_grad, model.parameters()), # lr=args.lr, # momentum=args.momentum, # weight_decay=10**args.wd, # ) optimizer = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, betas=(0.5, 0.99), weight_decay=10**args.wd, ) criterion = nn.CrossEntropyLoss() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'top_layer' in key: del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, '../checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters.pickle')) # load dataset (initial echograms) window_size = [args.window_dim, args.window_dim] # # Create echogram sampling index print('Sample echograms.') end = time.time() dataset_cp = sampling_echograms_full(args) dataloader_cp = torch.utils.data.DataLoader(dataset_cp, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, args.pca) # deepcluster = clustering.Kmeans(no.cluster, dim.pca) loss_collect = [[], [], []] # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children( ))) # End with linear(512*128) in original vgg) # ReLU in .classfier() will follow later # get the features for the whole dataset features_train, input_tensors_train, labels_train = compute_features( dataloader_cp, model, len(dataset_cp), device=device, args=args) # cluster the features print('Cluster the features') end = time.time() clustering_loss = deepcluster.cluster(features_train, verbose=args.verbose) print('Cluster time: {0:.2f} s'.format(time.time() - end)) # save patches per epochs if ((epoch + 1) % args.save_epoch == 0): end = time.time() cp_epoch_out = [ features_train, deepcluster.images_lists, deepcluster.images_dist_lists, input_tensors_train, labels_train ] with open("./cp_epoch_%d.pickle" % epoch, "wb") as f: pickle.dump(cp_epoch_out, f) print('Feature save time: {0:.2f} s'.format(time.time() - end)) # assign pseudo-labels print('Assign pseudo labels') size_cluster = np.zeros(len(deepcluster.images_lists)) for i, _list in enumerate(deepcluster.images_lists): size_cluster[i] = len(_list) print('size in clusters: ', size_cluster) img_label_pair_train = zip_img_label(input_tensors_train, labels_train) train_dataset = clustering.cluster_assign( deepcluster.images_lists, img_label_pair_train) # Reassigned pseudolabel # ((img[imgidx], label[imgidx]), pseudolabel, imgidx) # N = len(imgidx) # uniformly sample per target sampler_train = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, shuffle=False, num_workers=args.workers, sampler=sampler_train, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children() ) # classifier that ends with linear(512 * 128) mlp.append(nn.ReLU().to(device)) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Sequential( nn.Linear(fd, args.nmb_cluster), nn.Softmax(dim=1), ) # model.top_layer = nn.Linear(fd, args.nmb_cluster) model.top_layer[0].weight.data.normal_(0, 0.01) model.top_layer[0].bias.data.zero_() model.top_layer = model.top_layer.double() model.top_layer.to(device) # train network with clusters as pseudo-labels end = time.time() with torch.autograd.set_detect_anomaly(True): loss, tr_epoch_out = train(train_dataloader, model, criterion, optimizer, epoch, device=device, args=args) print('Train time: {0:.2f} s'.format(time.time() - end)) if ((epoch + 1) % args.save_epoch == 0): end = time.time() with open("./tr_epoch_%d.pickle" % epoch, "wb") as f: pickle.dump(tr_epoch_out, f) print('Save train time: {0:.2f} s'.format(time.time() - end)) # Accuracy with training set (output vs. pseudo label) accuracy_tr = np.mean( tr_epoch_out[1] == np.argmax(tr_epoch_out[2], axis=1)) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet tr_loss: {3:.3f} \n' 'ConvNet tr_acc: {4:.3f} \n'.format(epoch, time.time() - end, clustering_loss, loss, accuracy_tr)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) loss_collect[0].append(epoch) loss_collect[1].append(loss) loss_collect[2].append(accuracy_tr) with open("./loss_collect.pickle", "wb") as f: pickle.dump(loss_collect, f) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") criterion = nn.CrossEntropyLoss() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: VGGMiniCBR') model = VGGMiniCBR(num_classes=10) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.to(device) cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10 ** args.wd, ) # optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, model.parameters())) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) cluster_log = Logger(os.path.join(exp_path, 'clusters')) tra = [ transforms.Grayscale(num_output_channels=1), transforms.RandomAffine(degrees=5, translate=(0.03, 0.03), scale=(0.95, 1.05), shear=5), transforms.ToTensor(), transforms.Normalize((mean_std[use_zca][0],), (mean_std[use_zca][1],)) ] end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # ignoring ReLU layer in classifier # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset), device) # ndarray, (60k, 512) [-0.019, 0.016] # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.to(device) # train network with clusters as pseudo-labels end = time.time() # loss = train(train_dataloader, model, criterion, optimizer, epoch) loss = train(model, device, train_dataloader, optimizer, epoch, criterion) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}' .format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1]) ) writer.add_scalar('nmi/train', nmi, epoch) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save({'epoch': epoch + 1, 'arch': "VGGMiniCBR", 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, os.path.join(exp_path, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists) torch.save(model.state_dict(), os.path.join(args.exp, "mnist_cnn.pt"))
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) criterion = nn.CrossEntropyLoss() cluster_log = Logger(os.path.join(args.exp, '../..', 'clusters.pickle')) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) ''' ########################################## ########################################## # Model definition ########################################## ##########################################''' model = models.__dict__[args.arch](bn=True, num_cluster=args.nmb_cluster, num_category=args.nmb_category) fd = int(model.cluster_layer[0].weight.size() [1]) # due to transpose, fd is input dim of W (in dim, out dim) model.cluster_layer = None model.category_layer = None model.features = torch.nn.DataParallel(model.features) model = model.double() model.to(device) cudnn.benchmark = True if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_body = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer_body = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) ''' ############### ############### category_layer ############### ############### ''' model.category_layer = nn.Sequential( nn.Linear(fd, args.nmb_category), nn.Softmax(dim=1), ) model.category_layer[0].weight.data.normal_(0, 0.01) model.category_layer[0].bias.data.zero_() model.category_layer = model.category_layer.double() model.category_layer.to(device) if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_category = torch.optim.Adam( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer_category = torch.optim.SGD( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) ''' ######################################## ######################################## Create echogram sampling index ######################################## ########################################''' print('Sample echograms.') dataset_cp, dataset_semi = sampling_echograms_full(args) dataloader_cp = torch.utils.data.DataLoader(dataset_cp, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataloader_semi = torch.utils.data.DataLoader(dataset_semi, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataset_test = sampling_echograms_test(args) dataloader_test = torch.utils.data.DataLoader(dataset_test, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, args.pca) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top located layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'cluster_layer' in key: del copy_checkpoint_state_dict[key] # if 'category_layer' in key: # del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer_body.load_state_dict(checkpoint['optimizer_body']) optimizer_category.load_state_dict( checkpoint['optimizer_category']) category_save = os.path.join(args.exp, '../..', 'category_layer.pth.tar') if os.path.isfile(category_save): category_layer_param = torch.load(category_save) model.category_layer.load_state_dict(category_layer_param) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, '../..', 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) ''' ####################### ####################### PRETRAIN: commented ####################### #######################''' # if args.start_epoch < args.pretrain_epoch: # if os.path.isfile(os.path.join(args.exp, '..', 'pretrain_loss_collect.pickle')): # with open(os.path.join(args.exp, '..', 'pretrain_loss_collect.pickle'), "rb") as f: # pretrain_loss_collect = pickle.load(f) # else: # pretrain_loss_collect = [[], [], [], [], []] # print('Start pretraining with %d percent of the dataset from epoch %d/(%d)' # % (int(args.semi_ratio * 100), args.start_epoch, args.pretrain_epoch)) # model.cluster_layer = None # # for epoch in range(args.start_epoch, args.pretrain_epoch): # with torch.autograd.set_detect_anomaly(True): # pre_loss, pre_accuracy = supervised_train(loader=dataloader_semi, # model=model, # crit=criterion, # opt_body=optimizer_body, # opt_category=optimizer_category, # epoch=epoch, device=device, args=args) # test_loss, test_accuracy = test(dataloader_test, model, criterion, device, args) # # # print log # if args.verbose: # print('###### Epoch [{0}] ###### \n' # 'PRETRAIN tr_loss: {1:.3f} \n' # 'TEST loss: {2:.3f} \n' # 'PRETRAIN tr_accu: {3:.3f} \n' # 'TEST accu: {4:.3f} \n'.format(epoch, pre_loss, test_loss, pre_accuracy, test_accuracy)) # pretrain_loss_collect[0].append(epoch) # pretrain_loss_collect[1].append(pre_loss) # pretrain_loss_collect[2].append(test_loss) # pretrain_loss_collect[3].append(pre_accuracy) # pretrain_loss_collect[4].append(test_accuracy) # # torch.save({'epoch': epoch + 1, # 'arch': args.arch, # 'state_dict': model.state_dict(), # 'optimizer_body': optimizer_body.state_dict(), # 'optimizer_category': optimizer_category.state_dict(), # }, # os.path.join(args.exp, '..', 'checkpoint.pth.tar')) # torch.save(model.category_layer.state_dict(), os.path.join(args.exp, '..', 'category_layer.pth.tar')) # # with open(os.path.join(args.exp, '..', 'pretrain_loss_collect.pickle'), "wb") as f: # pickle.dump(pretrain_loss_collect, f) # # if (epoch+1) % args.checkpoints == 0: # path = os.path.join( # args.exp, '..', # 'checkpoints', # 'checkpoint_' + str(epoch) + '.pth.tar', # ) # if args.verbose: # print('Save checkpoint at: {0}'.format(path)) # torch.save({'epoch': epoch + 1, # 'arch': args.arch, # 'state_dict': model.state_dict(), # 'optimizer_body': optimizer_body.state_dict(), # 'optimizer_category': optimizer_category.state_dict(), # }, path) if os.path.isfile(os.path.join(args.exp, '../..', 'loss_collect.pickle')): with open(os.path.join(args.exp, '../..', 'loss_collect.pickle'), "rb") as f: loss_collect = pickle.load(f) else: loss_collect = [[], [], [], [], [], [], []] if os.path.isfile(os.path.join(args.exp, '../..', 'nmi_collect.pickle')): with open(os.path.join(args.exp, '../..', 'nmi_collect.pickle'), "rb") as ff: nmi_save = pickle.load(ff) else: nmi_save = [] ''' ####################### ####################### MAIN TRAINING ####################### #######################''' for epoch in range(args.start_epoch, args.epochs): end = time.time() model.classifier = nn.Sequential( *list(model.classifier.children()) [:-1]) # remove ReLU at classifier [:-1] model.cluster_layer = None model.category_layer = None ''' ####################### ####################### PSEUDO-LABEL GENERATION ####################### ####################### ''' print('Cluster the features') features_train, input_tensors_train, labels_train = compute_features( dataloader_cp, model, len(dataset_cp), device=device, args=args) clustering_loss, pca_features = deepcluster.cluster( features_train, verbose=args.verbose) nan_location = np.isnan(pca_features) inf_location = np.isinf(pca_features) if (not np.allclose(nan_location, 0)) or (not np.allclose( inf_location, 0)): print('PCA: Feature NaN or Inf found. Nan count: ', np.sum(nan_location), ' Inf count: ', np.sum(inf_location)) print('Skip epoch ', epoch) torch.save(pca_features, 'tr_pca_NaN_%d.pth.tar' % epoch) torch.save(features_train, 'tr_feature_NaN_%d.pth.tar' % epoch) continue print('Assign pseudo labels') size_cluster = np.zeros(len(deepcluster.images_lists)) for i, _list in enumerate(deepcluster.images_lists): size_cluster[i] = len(_list) print('size in clusters: ', size_cluster) img_label_pair_train = zip_img_label(input_tensors_train, labels_train) train_dataset = clustering.cluster_assign( deepcluster.images_lists, img_label_pair_train) # Reassigned pseudolabel # uniformly sample per target sampler_train = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, shuffle=False, num_workers=args.workers, sampler=sampler_train, pin_memory=True, ) ''' #################################################################### #################################################################### TRSNSFORM MODEL FOR SELF-SUPERVISION // SEMI-SUPERVISION #################################################################### #################################################################### ''' # Recover classifier with ReLU (that is not used in clustering) mlp = list(model.classifier.children( )) # classifier that ends with linear(512 * 128). No ReLU at the end mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.classifier.to(device) '''SELF-SUPERVISION (PSEUDO-LABELS)''' model.category_layer = None model.cluster_layer = nn.Sequential( nn.Linear(fd, args.nmb_cluster), # nn.Linear(4096, num_cluster), nn.Softmax( dim=1 ), # should be removed and replaced by ReLU for category_layer ) model.cluster_layer[0].weight.data.normal_(0, 0.01) model.cluster_layer[0].bias.data.zero_() model.cluster_layer = model.cluster_layer.double() model.cluster_layer.to(device) ''' train network with clusters as pseudo-labels ''' with torch.autograd.set_detect_anomaly(True): pseudo_loss, semi_loss, semi_accuracy = semi_train( train_dataloader, dataloader_semi, model, fd, criterion, optimizer_body, optimizer_category, epoch, device=device, args=args) # save checkpoint if (epoch + 1) % args.checkpoints == 0: path = os.path.join( args.exp, '../..', 'checkpoints', 'checkpoint_' + str(epoch) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, path) ''' ############## ############## # TEST phase ############## ############## ''' test_loss, test_accuracy, test_pred, test_label = test( dataloader_test, model, criterion, device, args) '''Save prediction of the test set''' if (epoch % args.save_epoch == 0): with open( os.path.join(args.exp, '../..', 'sup_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump([test_pred, test_label], f) if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Pseudo tr_loss: {2:.3f} \n' 'SEMI tr_loss: {3:.3f} \n' 'TEST loss: {4:.3f} \n' 'Clustering loss: {5:.3f} \n' 'SEMI accu: {6:.3f} \n' 'TEST accu: {7:.3f} \n'.format(epoch, time.time() - end, pseudo_loss, semi_loss, test_loss, clustering_loss, semi_accuracy, test_accuracy)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) nmi_save.append(nmi) print('NMI against previous assignment: {0:.3f}'.format(nmi)) with open( os.path.join(args.exp, '../..', 'nmi_collect.pickle'), "wb") as ff: pickle.dump(nmi_save, ff) except IndexError: pass print('####################### \n') # save cluster assignments cluster_log.log(deepcluster.images_lists) # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, os.path.join(args.exp, '../..', 'checkpoint.pth.tar')) torch.save(model.category_layer.state_dict(), os.path.join(args.exp, '../..', 'category_layer.pth.tar')) loss_collect[0].append(epoch) loss_collect[1].append(pseudo_loss) loss_collect[2].append(semi_loss) loss_collect[3].append(clustering_loss) loss_collect[4].append(test_loss) loss_collect[5].append(semi_accuracy) loss_collect[6].append(test_accuracy) with open(os.path.join(args.exp, '../..', 'loss_collect.pickle'), "wb") as f: pickle.dump(loss_collect, f) ''' ############################ ############################ # PSEUDO-LABEL GEN: Test set ############################ ############################ ''' model.classifier = nn.Sequential( *list(model.classifier.children()) [:-1]) # remove ReLU at classifier [:-1] model.cluster_layer = None model.category_layer = None print('TEST set: Cluster the features') features_te, input_tensors_te, labels_te = compute_features( dataloader_test, model, len(dataset_test), device=device, args=args) clustering_loss_te, pca_features_te = deepcluster.cluster( features_te, verbose=args.verbose) mlp = list(model.classifier.children( )) # classifier that ends with linear(512 * 128). No ReLU at the end mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.classifier.to(device) nan_location = np.isnan(pca_features_te) inf_location = np.isinf(pca_features_te) if (not np.allclose(nan_location, 0)) or (not np.allclose( inf_location, 0)): print('PCA: Feature NaN or Inf found. Nan count: ', np.sum(nan_location), ' Inf count: ', np.sum(inf_location)) print('Skip epoch ', epoch) torch.save(pca_features_te, 'te_pca_NaN_%d.pth.tar' % epoch) torch.save(features_te, 'te_feature_NaN_%d.pth.tar' % epoch) continue # save patches per epochs cp_epoch_out = [ features_te, deepcluster.images_lists, deepcluster.images_dist_lists, input_tensors_te, labels_te ] if (epoch % args.save_epoch == 0): with open( os.path.join(args.exp, '../..', 'cp_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump(cp_epoch_out, f) with open( os.path.join(args.exp, '../..', 'pca_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump(pca_features_te, f)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) run = wandb.init(project='deepcluster4nlp', config=args) # load the data end = time.time() tokenizer = get_tokenizer() dataset = ImdbDataset(True, tokenizer) dataloader = get_dataloader(dataset, tokenizer, args.batch) if args.verbose: print(('Load dataset: {0:.2f} s'.format(time.time() - end))) # cluster_lists = [[i*len(dataset)//args.nmb_cluster + j for j in range(len(dataset)//args.nmb_cluster)] # for i in range(args.nmb_cluster)] # # reassigned_dataset = cluster_assign(cluster_lists, dataset) # # reassigned_dataloader = get_dataloader(reassigned_dataset, tokenizer) # CNN if args.verbose: print(('Architecture: {}'.format(args.arch))) model = textcnn(tokenizer, num_class_features=args.num_class_features) #model = models.__dict__[args.arch](tokenizer) #fd =int(model.top_layer.weight.size()[1]) # replaced by num_class_features model.reset_top_layer() #model.top_layer = None #model.features = torch.nn.DataParallel(model.features, device_ids=[0]) model.to(device) cudnn.benchmark = True # wandb.watch(model) # create optimizer optimizer = torch.optim.AdamW( [x for x in model.parameters() if x.requires_grad], lr=args.lr) #optimizer = torch.optim.SGD( # [x for x in model.parameters() if x.requires_grad], # lr=args.lr, # momentum=args.momentum, # weight_decay=10**args.wd # ) # define loss function criterion = nn.CrossEntropyLoss().to(device) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in copy.deepcopy(checkpoint['state_dict']): if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.reset_top_layer() #top_layer = None # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) should_save = False if epoch % 50 == 0 or epoch == args.epochs - 1: should_save = True if should_save: # save the features and dataset wandb_dataset1 = wandb.Artifact(name=f'data', type='dataset') with wandb_dataset1.new_file(f'data_epoch_{epoch}.csv') as f: pd.DataFrame(np.asanyarray([d['text'] for d in dataset.data ])).to_csv(f, sep='\t') run.use_artifact(wandb_dataset1) wandb_dataset2 = wandb.Artifact(name=f'features', type='dataset') with wandb_dataset2.new_file(f'features_epoch_{epoch}.csv') as f: pd.DataFrame(features).to_csv(f, sep='\t') run.use_artifact(wandb_dataset2) pd.DataFrame( np.asanyarray([[d['text'], d['sentiment']] for d in dataset.data ])).to_csv(f'res/data_epoch_{epoch}.tsv', sep='\t', index=None, header=['text', 'sentiment']) pd.DataFrame(features).to_csv(f'res/features_epoch_{epoch}.tsv', sep='\t', index=None, header=False) # cluster the features if args.verbose: print('Cluster the features') clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels if args.verbose: print('Assign pseudo labels') # train_dataset = clustering.cluster_assign(deepcluster.cluster_lists, # dataset.data) train_dataset = clustering.cluster_assign(deepcluster.cluster_lists, dataset) # uniformly sample per target # sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), # deepcluster.cluster_lists) # train_dataloader = torch.utils.data.DataLoader( # train_dataset, # batch_size=args.batch, # num_workers=args.workers, # sampler=sampler, # pin_memory=True, # ) train_dataloader = get_dataloader(train_dataset, tokenizer, args.batch) # set last fully connected layer model.set_top_layer(cluster_list_length=len(deepcluster.cluster_lists)) #model.classifier = nn.Sequential(*mlp) #model.top_layer = nn.Linear(num_class_features,len(deepcluster.cluster_lists) ) #model.top_layer.weight.data.normal_(0, 0.01) #model.top_layer.bias.data.zero_() #model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) summary_dict = { 'time': time.time() - end, 'clustering_loss': clustering_loss, 'convnet_loss': loss, 'clusters': len(deepcluster.cluster_lists) } # print log if args.verbose: print(('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss))) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.cluster_lists), clustering.arrange_clustering(cluster_log.data[-1])) summary_dict['NMI'] = nmi print(('NMI against previous assignment: {0:.3f}'.format(nmi))) except IndexError: pass print('####################### \n') # wandb log # wandb.log(summary_dict) # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) if epoch == args.epochs - 1: model_artifact = wandb.Artifact(name=f'model', type='model') model_artifact.add_file( os.path.join(args.exp, 'checkpoint.pth.tar')) run.use_artifact(model_artifact) # save cluster assignments cluster_log.log(deepcluster.cluster_lists)
def main(): global args args = parser.parse_args() args.out_dir = os.path.join(args.out_root, str(args.model_ind)) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) if args.resume: # get old args old_args = args reloaded_args_path = os.path.join(old_args.out_dir, "config.pickle") print("Loading restarting args from: %s" % reloaded_args_path) with open(reloaded_args_path, "rb") as args_f: args = pickle.load(args_f) assert (args.model_ind == old_args.model_ind) args.resume = True next_epoch = args.epoch + 1 # indexed from 0, also = num epochs passed print("stored losses and accs lens %d %d %d, cutting to %d %d %d" % (len(args.epoch_loss), len(args.epoch_cluster_dist), len(args.epoch_acc), next_epoch, next_epoch, next_epoch + 1)) args.epoch_loss = args.epoch_loss[:next_epoch] args.epoch_assess_cluster_loss = args.epoch_assess_cluster_loss[:next_epoch] args.epoch_cluster_dist = args.epoch_cluster_dist[:next_epoch] args.epoch_acc = args.epoch_acc[:(next_epoch + 1)] if not hasattr(args, "if_stl_dont_use_unlabelled"): args.if_stl_dont_use_unlabelled = False else: args.epoch_acc = [] args.epoch_assess_cluster_loss = [] args.epoch_cluster_dist = [] args.epoch_loss = [] # train loss args.epoch_distribution = [] args.epoch_centroid_min = [] args.epoch_centroid_max = [] next_epoch = 0 if not args.find_data_stats: print("args/config:") print(config_to_str(args)) sys.stdout.flush() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # losses and acc fig, axarr = plt.subplots(4, sharex=False, figsize=(20, 20)) # distr distr_fig, distr_ax = plt.subplots(3, sharex=False, figsize=(20, 20)) # Data --------------------------------------------------------------------- if args.dataset == "MNIST": assert (not args.sobel) args.input_ch = 1 else: if args.sobel: args.input_ch = 2 else: args.input_ch = 3 # preprocessing of data tra = [] tra_test = [] if args.rand_crop_sz != -1: tra += [transforms.RandomCrop(args.rand_crop_sz)] tra_test += [transforms.CenterCrop(args.rand_crop_sz)] tra += [transforms.Resize(args.input_sz)] tra_test += [transforms.Resize(args.input_sz)] args.data_mean = None # toggled on in cluster_assign args.data_std = None if args.normalize and (not args.find_data_stats): data_mean, data_std = _DATASET_NORM[args.dataset] args.data_mean = data_mean args.data_std = data_std normalize = transforms.Normalize(mean=args.data_mean, std=args.data_std) tra.append(normalize) tra_test.append(normalize) # actual augmentation here if not (args.dataset == "MNIST"): tra += [transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.125) ] else: print("skipping horizontal flipping and jitter") tra += [transforms.ToTensor()] tra_test += [transforms.ToTensor()] tra = transforms.Compose(tra) tra_test = transforms.Compose(tra_test) # load the data dataset, dataloader, test_dataset, test_dataloader = make_data(args, tra, tra_test) if args.find_data_stats: print(args.dataset) print("train dataset mean, std: %s, %s" % compute_data_stats(dataloader, len(dataset))) print("test dataset mean, std: %s, %s" % compute_data_stats(test_dataloader, len(test_dataset))) exit(0) # Model -------------------------------------------------------------------- # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) sys.stdout.flush() model = models.__dict__[args.arch](sobel=args.sobel, out=args.k, input_sp_sz=args.input_sz, input_ch=args.input_ch) fd = model.dlen # model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer # top layer not created at this point! assert (model.top_layer is None) optimizer = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, # momentum=args.momentum, # weight_decay=10**args.wd, ) if args.resume: # remove top_layer parameters from checkpoint checkpoint = torch.load(os.path.join(old_args.out_dir, "%s.pytorch" % args.resume_mode)) for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) # after optimiser loading done, add a top layer # model.make_top_layer() # define loss function criterion = nn.CrossEntropyLoss().cuda() # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.k) if (not args.resume) or args.just_analyse: print("Doing some assessment") sys.stdout.flush() acc, distribution, centroid_min_max, assess_cluster_loss = \ assess_acc(args, test_dataset, test_dataloader, model, len(test_dataset)) print("got %f" % acc) sys.stdout.flush() if args.just_analyse: exit(0) args.epoch_acc.append(acc) args.epoch_assess_cluster_loss.append(assess_cluster_loss) args.epoch_distribution.append(list(distribution)) args.epoch_centroid_min.append(centroid_min_max[0]) args.epoch_centroid_max.append(centroid_min_max[1]) # Train -------------------------------------------------------------------- for epoch in range(next_epoch, args.total_epochs): # remove relu (getting features) # model.remove_feature_head_relu() # get the features for the whole dataset features = compute_features(args, dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, proc_feat=args.proc_feat, verbose=args.verbose) # assign pseudo-labels to make new dataset train_dataset = clustering.cluster_assign(args, deepcluster.images_lists, dataset, tra=tra) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_sz, num_workers=args.workers, sampler=sampler, pin_memory=True, ) if epoch == next_epoch: print("fd length: %d" % fd) # prepare for training by reintroducing relu and resetting last layer # model.add_feature_head_relu() # model.reset_top_layer() # train network with clusters as pseudo-labels loss = train(train_dataloader, model, criterion, optimizer, epoch, per_batch=(epoch == next_epoch)) # assess --------------------------------------------------------------- acc, distribution, centroid_min_max, assess_cluster_loss = \ assess_acc(args, test_dataset, test_dataloader, model, len(test_dataset)) print("Model %d, epoch %d, cluster loss %f, train loss %f, acc %f " "time %s" % (args.model_ind, epoch, clustering_loss, loss, acc, datetime.now())) sys.stdout.flush() # update args is_best = False if acc > max(args.epoch_acc): is_best = True args.epoch_acc.append(acc) args.epoch_assess_cluster_loss.append(assess_cluster_loss) args.epoch_loss.append(loss) args.epoch_cluster_dist.append(clustering_loss) args.epoch_distribution.append(distribution) args.epoch_centroid_min.append(centroid_min_max[0]) args.epoch_centroid_max.append(centroid_min_max[1]) # draw graphs and save axarr[0].clear() axarr[0].plot(args.epoch_acc) axarr[0].set_title("Acc") axarr[1].clear() axarr[1].plot(args.epoch_loss) axarr[1].set_title("Training loss") axarr[2].clear() axarr[2].plot(args.epoch_cluster_dist) axarr[2].set_title("Cluster distance (train, k)") axarr[3].clear() axarr[3].plot(args.epoch_assess_cluster_loss) axarr[3].set_title("Cluster distance (assess, gt_k)") distr_ax[0].clear() epoch_distribution = np.array(args.epoch_distribution) for gt_c in xrange(args.gt_k): distr_ax[0].plot(epoch_distribution[:, gt_c]) distr_ax[0].set_title("Prediction distribution") distr_ax[1].clear() distr_ax[1].plot(args.epoch_centroid_min) distr_ax[1].set_title("Centroid avg-of-abs: min") distr_ax[2].clear() distr_ax[2].plot(args.epoch_centroid_max) distr_ax[2].set_title("Centroid avg-of-abs: max") # save ----------------------------------------------------------------- # graphs fig.canvas.draw_idle() fig.savefig(os.path.join(args.out_dir, "plots.png")) distr_fig.canvas.draw_idle() distr_fig.savefig(os.path.join(args.out_dir, "distribution.png")) # model if epoch % args.checkpoint_granularity == 0: torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, os.path.join(args.out_dir, "latest.pytorch")) args.epoch = epoch # last saved checkpoint if is_best: torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, os.path.join(args.out_dir, "best.pytorch")) args.best_epoch = epoch # args with open(os.path.join(args.out_dir, "config.pickle"), 'w') as outfile: pickle.dump(args, outfile) with open(os.path.join(args.out_dir, "config.txt"), "w") as text_file: text_file.write("%s" % args)
for epoch in range(epochs): end = time.time() # remove head model.top_layer_class = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) ########################################################################### # if apply semi-supervised learning => stage1: unsupervised part features_unlabel = compute_features(unlabel_loader, model, len(unlabel_dataset), dataset_type='non-supervised') clustering_loss_unlabel = deepcluster.cluster(features_unlabel, verbose=verbose) cluster_training_dataset = clustering.cluster_assign( deepcluster.images_lists, unlabel_dataset.imgs, dataset_type='non-supervised') unlabel_sampler = UnifLabelSampler( int(reassign * len(cluster_training_dataset)), deepcluster.images_lists) cluster_dataloader = torch.utils.data.DataLoader(cluster_training_dataset, batch_size=batch_size * C, num_workers=12, sampler=unlabel_sampler, pin_memory=True) mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer_class = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer_class.weight.data.normal_(0, 0.01) model.top_layer_class.bias.data.zero_()