def train(): g_exit = GracefulExit() timestamp = datetime.datetime.utcnow().strftime(TIMESTAMP_FORMAT) logger = Logger(ENV_NAME, timestamp) env = gym.make(ENV_NAME) dim_obs = env.observation_space.shape[0] + 1 dim_act = env.action_space.shape[0] scaler = VecScaler(dim_obs) rec_dir = os.path.join(REC_DIR, ENV_NAME, timestamp) env = gym.wrappers.Monitor(env, rec_dir, force=True) agent = PPO(dim_obs, dim_act, GAMMA, LAMBDA, CLIP_RANGE, LR_POLICY, LR_VALUE_F, logger) run_batch(env, agent.policy, 5, scaler) episode = 0 while episode < NUM_EPISODES: batch_size = min(MAX_BATCH, NUM_EPISODES - episode) trajectories, steps, mean_return = run_batch(env, agent.policy, batch_size, scaler) episode += batch_size logger.log({'_time': datetime.datetime.utcnow().strftime(TIMESTAMP_FORMAT), '_episode': episode, 'steps': steps, '_mean_return': mean_return}) agent.update(trajectories) logger.write() if g_exit.exit: break agent.close() logger.close()
def __setitem__(self, name: 'str', obj: 'Object'): """Add object. Raises ------ KeyError The name is already in use. """ if name in [row[Column.NAME.value] for row in self]: raise KeyError(name + " already names an object!") self.append([obj, obj.name, str(type(obj).__name__)]) obj.update(self.window) Logger.log(LogLevel.INFO, str(obj))
def _on_ok(self, _): """Handle on_create_object_ok signal. Create object and return dialog if typed information is valid, else repeat form. Raises ------ RuntimeError If the object is not valid. """ try: self._wml_interpreter.validate_object( self.name, self._points_field.get_text()) self._dialog.response(Gtk.ResponseType.OK) except RuntimeError as error: Logger.log(LogLevel.ERRO, error)
def __delitem__(self, name: 'str'): """Delete object. Raises ------ KeyError The named object does not exist. """ for row in self: if row[Column.NAME.value] == name: if self.window.name == name: raise KeyError("cannot remove window!") else: self.remove(row.iter) Logger.log(LogLevel.INFO, name + " has been removed!") return raise KeyError(name + " does not name an object!")
def _on_configure(self, wid: 'Gtk.Widget', evt: 'Gdk.EventConfigure'): """Handle on_configure signal. Create surface and paint it white. Notes ----- This signal is invoked during setup and every time the drawing areaa resizes. """ win = wid.get_window() width = wid.get_allocated_width() height = wid.get_allocated_height() self._surface = win.create_similar_surface(cairo.CONTENT_COLOR, width, height) self._resolution = (width - 20, height - 20) Logger.log(LogLevel.INFO, "viewport.config() at ({},{})".format(width, height)) self.clear()
def add(self, **kwargs): """Attempt to add object to ObjectStore.""" REQUIRED_PARAMS = { "Point": [], "Line": [], "Wireframe": ['faces'], "Curve": ['bmatu'], "Surface": ['bmatu', 'bmatv'], } try: name = kwargs['name'] points = kwargs['points'] color = kwargs['color'] obj_type = kwargs['obj_type'] for param in REQUIRED_PARAMS[obj_type]: if param not in kwargs: raise ValueError except ValueError: Logger.log( LogLevel.ERRO, "Attempting to create object without proper parameters") return call_constructor = { "Point": lambda: Point(name, points, color), "Line": lambda: Line(name, points, color), "Wireframe": lambda: Wireframe(name, points, kwargs['faces'], color), "Curve": lambda: Curve(name, points, kwargs['bmatu'], color), "Surface": lambda: Surface(name, points, kwargs['bmatu'], kwargs['bmatv'], color), } self._obj_store[name] = call_constructor[obj_type]()
class ComputeUnit(MemSysComponent): def __init__(self, sys, clk, user_id, logger_on, lower_component): super().__init__("Compute Unit " + str(user_id), clk, sys, lower_component) self.logger = Logger(self.name, logger_on, self.sys) self.waiting_mem = set() self.store_queue = [] def load(self, address): self.logger.log("Load " + str(hex(address))) self.lower_load(address) self.waiting_mem.add(address) self.is_idle = False def store(self, address): self.logger.log("Store " + str(hex(address))) self.lower_store(address) def complete_load(self, address): self.logger.log("Completing load: " + str(address)) cache_line = address >> int( math.log(self.sys.get_cache_line_size()) / math.log(2)) clear_addrs = [] for waiting_address in self.waiting_mem: waiting_cache_line = waiting_address >> int( math.log(self.sys.get_cache_line_size()) / math.log(2)) if waiting_cache_line == cache_line: self.logger.log( ("Data from " + str(hex(waiting_address)) + " available.")) clear_addrs.append(waiting_address) for address in clear_addrs: if address in self.waiting_mem: self.waiting_mem.remove(address) if len(self.waiting_mem) == 0: self.is_idle = True def advance(self, cycles): self.clk += cycles
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True print('CNN builded.') # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) print('Optimizer created.') # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in list(checkpoint['state_dict']): if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) epochs_log = Logger(os.path.join(args.exp, 'epochs300')) # Loading and preprocessing of data: custom Rescale and ToTensor transformations for VidDataset. # VidDataset has a box_frame, which is a pandas Dataframe containing images path an their bb coordinates. # Each VidDataset sample is a dict formed by a tensor (the image) and crop_coord (bb xmin, xmax, ymin, ymax). # If a pickled dataset is passed, it will be deserialized and used, else it will be normally loaded. # It is useful when we want to preprocess a dataset. print('Start loading dataset...') end = time.time() if args.dataset_pkl: dataset = deserialize_obj(args.dataset_pkl) # I will never use labels in deepcluster dataset.vid_labels = None else: tra = [preprocessing.Rescale((224, 224)), preprocessing.ToTensor()] dataset = VidDataset(xml_annotations_dir=args.ann, root_dir=args.data, transform=transforms.Compose(tra)) dataset.imgs = dataset.imgs[0::args.load_step] dataset.samples = dataset.samples[0::args.load_step] print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # calculate batch size sum (better clean-up data with clean_data.py) dataset_len = 0 if not args.dataset_pkl: dataloader.collate_fn = my_collate for s in dataloader: dataset_len += len(s['image']) else: dataset_len = len(dataset.imgs) print("Dataset final dimension: ", dataset_len) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset hardcoded dataset dim for step=5 features = compute_features(dataloader, model, args.load_step, dataset_len) # cluster the features if args.verbose: print('Cluster the features') clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels if args.verbose: print('Assign pseudo labels') train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformly sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) if not args.dataset_pkl: train_dataloader.collate_fn = my_collate # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) epoch_log = [epoch, time.time() - end, clustering_loss, loss] try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) epoch_log.append(nmi) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists) epochs_log.log(epoch_log)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) criterion = nn.CrossEntropyLoss() cluster_log = Logger(os.path.join(args.exp, '../..', 'clusters.pickle')) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) ''' ########################################## ########################################## # Model definition ########################################## ##########################################''' model = models.__dict__[args.arch](bn=True, num_cluster=args.nmb_cluster, num_category=args.nmb_category) fd = int(model.cluster_layer[0].weight.size() [1]) # due to transpose, fd is input dim of W (in dim, out dim) model.cluster_layer = None model.category_layer = None model.features = torch.nn.DataParallel(model.features) model = model.double() model.to(device) cudnn.benchmark = True if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_body = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer_body = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) ''' ############### ############### category_layer ############### ############### ''' model.category_layer = nn.Sequential( nn.Linear(fd, args.nmb_category), nn.Softmax(dim=1), ) model.category_layer[0].weight.data.normal_(0, 0.01) model.category_layer[0].bias.data.zero_() model.category_layer = model.category_layer.double() model.category_layer.to(device) if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_category = torch.optim.Adam( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10**args.wd, ) else: print('SGD optimizer: conv') optimizer_category = torch.optim.SGD( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10**args.wd, ) ''' ######################################## ######################################## Create echogram sampling index ######################################## ########################################''' print('Sample echograms.') dataset_cp, dataset_semi = sampling_echograms_full(args) dataloader_cp = torch.utils.data.DataLoader(dataset_cp, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataloader_semi = torch.utils.data.DataLoader(dataset_semi, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataset_test = sampling_echograms_test(args) dataloader_test = torch.utils.data.DataLoader(dataset_test, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, args.pca) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top located layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'cluster_layer' in key: del copy_checkpoint_state_dict[key] # if 'category_layer' in key: # del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer_body.load_state_dict(checkpoint['optimizer_body']) optimizer_category.load_state_dict( checkpoint['optimizer_category']) category_save = os.path.join(args.exp, '../..', 'category_layer.pth.tar') if os.path.isfile(category_save): category_layer_param = torch.load(category_save) model.category_layer.load_state_dict(category_layer_param) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, '../..', 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) ''' ####################### ####################### PRETRAIN: commented ####################### #######################''' # if args.start_epoch < args.pretrain_epoch: # if os.path.isfile(os.path.join(args.exp, '..', 'pretrain_loss_collect.pickle')): # with open(os.path.join(args.exp, '..', 'pretrain_loss_collect.pickle'), "rb") as f: # pretrain_loss_collect = pickle.load(f) # else: # pretrain_loss_collect = [[], [], [], [], []] # print('Start pretraining with %d percent of the dataset from epoch %d/(%d)' # % (int(args.semi_ratio * 100), args.start_epoch, args.pretrain_epoch)) # model.cluster_layer = None # # for epoch in range(args.start_epoch, args.pretrain_epoch): # with torch.autograd.set_detect_anomaly(True): # pre_loss, pre_accuracy = supervised_train(loader=dataloader_semi, # model=model, # crit=criterion, # opt_body=optimizer_body, # opt_category=optimizer_category, # epoch=epoch, device=device, args=args) # test_loss, test_accuracy = test(dataloader_test, model, criterion, device, args) # # # print log # if args.verbose: # print('###### Epoch [{0}] ###### \n' # 'PRETRAIN tr_loss: {1:.3f} \n' # 'TEST loss: {2:.3f} \n' # 'PRETRAIN tr_accu: {3:.3f} \n' # 'TEST accu: {4:.3f} \n'.format(epoch, pre_loss, test_loss, pre_accuracy, test_accuracy)) # pretrain_loss_collect[0].append(epoch) # pretrain_loss_collect[1].append(pre_loss) # pretrain_loss_collect[2].append(test_loss) # pretrain_loss_collect[3].append(pre_accuracy) # pretrain_loss_collect[4].append(test_accuracy) # # torch.save({'epoch': epoch + 1, # 'arch': args.arch, # 'state_dict': model.state_dict(), # 'optimizer_body': optimizer_body.state_dict(), # 'optimizer_category': optimizer_category.state_dict(), # }, # os.path.join(args.exp, '..', 'checkpoint.pth.tar')) # torch.save(model.category_layer.state_dict(), os.path.join(args.exp, '..', 'category_layer.pth.tar')) # # with open(os.path.join(args.exp, '..', 'pretrain_loss_collect.pickle'), "wb") as f: # pickle.dump(pretrain_loss_collect, f) # # if (epoch+1) % args.checkpoints == 0: # path = os.path.join( # args.exp, '..', # 'checkpoints', # 'checkpoint_' + str(epoch) + '.pth.tar', # ) # if args.verbose: # print('Save checkpoint at: {0}'.format(path)) # torch.save({'epoch': epoch + 1, # 'arch': args.arch, # 'state_dict': model.state_dict(), # 'optimizer_body': optimizer_body.state_dict(), # 'optimizer_category': optimizer_category.state_dict(), # }, path) if os.path.isfile(os.path.join(args.exp, '../..', 'loss_collect.pickle')): with open(os.path.join(args.exp, '../..', 'loss_collect.pickle'), "rb") as f: loss_collect = pickle.load(f) else: loss_collect = [[], [], [], [], [], [], []] if os.path.isfile(os.path.join(args.exp, '../..', 'nmi_collect.pickle')): with open(os.path.join(args.exp, '../..', 'nmi_collect.pickle'), "rb") as ff: nmi_save = pickle.load(ff) else: nmi_save = [] ''' ####################### ####################### MAIN TRAINING ####################### #######################''' for epoch in range(args.start_epoch, args.epochs): end = time.time() model.classifier = nn.Sequential( *list(model.classifier.children()) [:-1]) # remove ReLU at classifier [:-1] model.cluster_layer = None model.category_layer = None ''' ####################### ####################### PSEUDO-LABEL GENERATION ####################### ####################### ''' print('Cluster the features') features_train, input_tensors_train, labels_train = compute_features( dataloader_cp, model, len(dataset_cp), device=device, args=args) clustering_loss, pca_features = deepcluster.cluster( features_train, verbose=args.verbose) nan_location = np.isnan(pca_features) inf_location = np.isinf(pca_features) if (not np.allclose(nan_location, 0)) or (not np.allclose( inf_location, 0)): print('PCA: Feature NaN or Inf found. Nan count: ', np.sum(nan_location), ' Inf count: ', np.sum(inf_location)) print('Skip epoch ', epoch) torch.save(pca_features, 'tr_pca_NaN_%d.pth.tar' % epoch) torch.save(features_train, 'tr_feature_NaN_%d.pth.tar' % epoch) continue print('Assign pseudo labels') size_cluster = np.zeros(len(deepcluster.images_lists)) for i, _list in enumerate(deepcluster.images_lists): size_cluster[i] = len(_list) print('size in clusters: ', size_cluster) img_label_pair_train = zip_img_label(input_tensors_train, labels_train) train_dataset = clustering.cluster_assign( deepcluster.images_lists, img_label_pair_train) # Reassigned pseudolabel # uniformly sample per target sampler_train = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, shuffle=False, num_workers=args.workers, sampler=sampler_train, pin_memory=True, ) ''' #################################################################### #################################################################### TRSNSFORM MODEL FOR SELF-SUPERVISION // SEMI-SUPERVISION #################################################################### #################################################################### ''' # Recover classifier with ReLU (that is not used in clustering) mlp = list(model.classifier.children( )) # classifier that ends with linear(512 * 128). No ReLU at the end mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.classifier.to(device) '''SELF-SUPERVISION (PSEUDO-LABELS)''' model.category_layer = None model.cluster_layer = nn.Sequential( nn.Linear(fd, args.nmb_cluster), # nn.Linear(4096, num_cluster), nn.Softmax( dim=1 ), # should be removed and replaced by ReLU for category_layer ) model.cluster_layer[0].weight.data.normal_(0, 0.01) model.cluster_layer[0].bias.data.zero_() model.cluster_layer = model.cluster_layer.double() model.cluster_layer.to(device) ''' train network with clusters as pseudo-labels ''' with torch.autograd.set_detect_anomaly(True): pseudo_loss, semi_loss, semi_accuracy = semi_train( train_dataloader, dataloader_semi, model, fd, criterion, optimizer_body, optimizer_category, epoch, device=device, args=args) # save checkpoint if (epoch + 1) % args.checkpoints == 0: path = os.path.join( args.exp, '../..', 'checkpoints', 'checkpoint_' + str(epoch) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, path) ''' ############## ############## # TEST phase ############## ############## ''' test_loss, test_accuracy, test_pred, test_label = test( dataloader_test, model, criterion, device, args) '''Save prediction of the test set''' if (epoch % args.save_epoch == 0): with open( os.path.join(args.exp, '../..', 'sup_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump([test_pred, test_label], f) if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Pseudo tr_loss: {2:.3f} \n' 'SEMI tr_loss: {3:.3f} \n' 'TEST loss: {4:.3f} \n' 'Clustering loss: {5:.3f} \n' 'SEMI accu: {6:.3f} \n' 'TEST accu: {7:.3f} \n'.format(epoch, time.time() - end, pseudo_loss, semi_loss, test_loss, clustering_loss, semi_accuracy, test_accuracy)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) nmi_save.append(nmi) print('NMI against previous assignment: {0:.3f}'.format(nmi)) with open( os.path.join(args.exp, '../..', 'nmi_collect.pickle'), "wb") as ff: pickle.dump(nmi_save, ff) except IndexError: pass print('####################### \n') # save cluster assignments cluster_log.log(deepcluster.images_lists) # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, os.path.join(args.exp, '../..', 'checkpoint.pth.tar')) torch.save(model.category_layer.state_dict(), os.path.join(args.exp, '../..', 'category_layer.pth.tar')) loss_collect[0].append(epoch) loss_collect[1].append(pseudo_loss) loss_collect[2].append(semi_loss) loss_collect[3].append(clustering_loss) loss_collect[4].append(test_loss) loss_collect[5].append(semi_accuracy) loss_collect[6].append(test_accuracy) with open(os.path.join(args.exp, '../..', 'loss_collect.pickle'), "wb") as f: pickle.dump(loss_collect, f) ''' ############################ ############################ # PSEUDO-LABEL GEN: Test set ############################ ############################ ''' model.classifier = nn.Sequential( *list(model.classifier.children()) [:-1]) # remove ReLU at classifier [:-1] model.cluster_layer = None model.category_layer = None print('TEST set: Cluster the features') features_te, input_tensors_te, labels_te = compute_features( dataloader_test, model, len(dataset_test), device=device, args=args) clustering_loss_te, pca_features_te = deepcluster.cluster( features_te, verbose=args.verbose) mlp = list(model.classifier.children( )) # classifier that ends with linear(512 * 128). No ReLU at the end mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.classifier.to(device) nan_location = np.isnan(pca_features_te) inf_location = np.isinf(pca_features_te) if (not np.allclose(nan_location, 0)) or (not np.allclose( inf_location, 0)): print('PCA: Feature NaN or Inf found. Nan count: ', np.sum(nan_location), ' Inf count: ', np.sum(inf_location)) print('Skip epoch ', epoch) torch.save(pca_features_te, 'te_pca_NaN_%d.pth.tar' % epoch) torch.save(features_te, 'te_feature_NaN_%d.pth.tar' % epoch) continue # save patches per epochs cp_epoch_out = [ features_te, deepcluster.images_lists, deepcluster.images_dist_lists, input_tensors_te, labels_te ] if (epoch % args.save_epoch == 0): with open( os.path.join(args.exp, '../..', 'cp_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump(cp_epoch_out, f) with open( os.path.join(args.exp, '../..', 'pca_epoch_%d_te.pickle' % epoch), "wb") as f: pickle.dump(pca_features_te, f)
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=False, bn=True, out=args.nmb_cluster) fd = int(model.top_layer[0].weight.size() [1]) # due to transpose, fd is input dim of W (in dim, out dim) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model = model.double() model.to(device) cudnn.benchmark = True # create optimizer # optimizer = torch.optim.SGD( # filter(lambda x: x.requires_grad, model.parameters()), # lr=args.lr, # momentum=args.momentum, # weight_decay=10**args.wd, # ) optimizer = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, betas=(0.5, 0.99), weight_decay=10**args.wd, ) criterion = nn.CrossEntropyLoss() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'top_layer' in key: del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, '../checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters.pickle')) # load dataset (initial echograms) window_size = [args.window_dim, args.window_dim] # # Create echogram sampling index print('Sample echograms.') end = time.time() dataset_cp = sampling_echograms_full(args) dataloader_cp = torch.utils.data.DataLoader(dataset_cp, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, args.pca) # deepcluster = clustering.Kmeans(no.cluster, dim.pca) loss_collect = [[], [], []] # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children( ))) # End with linear(512*128) in original vgg) # ReLU in .classfier() will follow later # get the features for the whole dataset features_train, input_tensors_train, labels_train = compute_features( dataloader_cp, model, len(dataset_cp), device=device, args=args) # cluster the features print('Cluster the features') end = time.time() clustering_loss = deepcluster.cluster(features_train, verbose=args.verbose) print('Cluster time: {0:.2f} s'.format(time.time() - end)) # save patches per epochs if ((epoch + 1) % args.save_epoch == 0): end = time.time() cp_epoch_out = [ features_train, deepcluster.images_lists, deepcluster.images_dist_lists, input_tensors_train, labels_train ] with open("./cp_epoch_%d.pickle" % epoch, "wb") as f: pickle.dump(cp_epoch_out, f) print('Feature save time: {0:.2f} s'.format(time.time() - end)) # assign pseudo-labels print('Assign pseudo labels') size_cluster = np.zeros(len(deepcluster.images_lists)) for i, _list in enumerate(deepcluster.images_lists): size_cluster[i] = len(_list) print('size in clusters: ', size_cluster) img_label_pair_train = zip_img_label(input_tensors_train, labels_train) train_dataset = clustering.cluster_assign( deepcluster.images_lists, img_label_pair_train) # Reassigned pseudolabel # ((img[imgidx], label[imgidx]), pseudolabel, imgidx) # N = len(imgidx) # uniformly sample per target sampler_train = UnifLabelSampler(int(len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, shuffle=False, num_workers=args.workers, sampler=sampler_train, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children() ) # classifier that ends with linear(512 * 128) mlp.append(nn.ReLU().to(device)) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Sequential( nn.Linear(fd, args.nmb_cluster), nn.Softmax(dim=1), ) # model.top_layer = nn.Linear(fd, args.nmb_cluster) model.top_layer[0].weight.data.normal_(0, 0.01) model.top_layer[0].bias.data.zero_() model.top_layer = model.top_layer.double() model.top_layer.to(device) # train network with clusters as pseudo-labels end = time.time() with torch.autograd.set_detect_anomaly(True): loss, tr_epoch_out = train(train_dataloader, model, criterion, optimizer, epoch, device=device, args=args) print('Train time: {0:.2f} s'.format(time.time() - end)) if ((epoch + 1) % args.save_epoch == 0): end = time.time() with open("./tr_epoch_%d.pickle" % epoch, "wb") as f: pickle.dump(tr_epoch_out, f) print('Save train time: {0:.2f} s'.format(time.time() - end)) # Accuracy with training set (output vs. pseudo label) accuracy_tr = np.mean( tr_epoch_out[1] == np.argmax(tr_epoch_out[2], axis=1)) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet tr_loss: {3:.3f} \n' 'ConvNet tr_acc: {4:.3f} \n'.format(epoch, time.time() - end, clustering_loss, loss, accuracy_tr)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) loss_collect[0].append(epoch) loss_collect[1].append(loss) loss_collect[2].append(accuracy_tr) with open("./loss_collect.pickle", "wb") as f: pickle.dump(loss_collect, f) # save cluster assignments cluster_log.log(deepcluster.images_lists)
# data train_data, val_data = load_data(data_config, exp_config['batch_size']) eval_length = data_config['eval_length'] # logger # model model = Model(**model_config).to(0) # optimizer optimizer = AdamOptimizer(params=model.parameters(), lr=exp_config['lr'], grad_clip_value=exp_config['grad_clip_value'], grad_clip_norm=exp_config['grad_clip_norm']) logger_on = True if logger_on: logger = Logger(exp_config, model_config, data_config) # train / val loop for epoch in range(exp_config['n_epochs']): print('Epoch:', epoch) if logger_on: logger.log(train(train_data, model, optimizer, eval_length), 'train') logger.log(validation(val_data, model, eval_length), 'val') logger.save(model) else: train(train_data, model, optimizer, eval_length) validation(val_data, model, eval_length)
def main(): global args args = parser.parse_args() # Temporary directory used for downloaded models etc tmppth = tempfile.mkdtemp() print('Using temporary directory %s' % tmppth) #fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) best_prec1 = 0 # Checkpoint to be loaded from disc checkpointbasename = 'checkpoint_%d.pth.tar' % args.timepoint checkpointfn = os.path.join(tmppth, checkpointbasename) # Pull model from S3 s3 = boto3.resource('s3') try: s3fn = os.path.join(args.checkpointpath, checkpointbasename) print("Attempting s3 download from %s" % s3fn) s3.Bucket(args.checkpointbucket).download_file(s3fn, checkpointfn) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": print("The object does not exist.") else: raise # Prepare place for output linearclassfn = os.path.join( args.linearclasspath, "linearclass_time_%d_conv_%d" % (args.timepoint, args.conv)) print("Will write output to bucket %s, %s", args.linearclassbucket, linearclassfn) # Load model model = load_model(checkpointfn) model.cuda() cudnn.benchmark = True # Recover disc os.remove(checkpointfn) # freeze the features layers for param in model.features.parameters(): param.requires_grad = False # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val_in_folders') valdir_double = os.path.join(args.data, 'val_in_double_folders') valdir_list = [] # Load in AoA table if needed if args.aoaval: aoalist = pd.read_csv('matchingAoA_ImageNet_excel.csv') for index, row in aoalist.iterrows(): node = row['node'] aoa = float(row['aoa']) if not math.isnan(aoa): valdir_list.append({ 'node': node, 'pth': os.path.join(valdir_double, node), 'aoa': aoa }) else: print('Not found %s' % node) #valdir_list=valdir_list[:5] trim for testing print('Using %d validation categories for aoa' % len(valdir_list)) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.tencrops: transformations_val = [ transforms.Resize(256), transforms.TenCrop(224), transforms.Lambda(lambda crops: torch.stack( [normalize(transforms.ToTensor()(crop)) for crop in crops])), ] else: transformations_val = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] transformations_train = [ transforms.Resize(256), transforms.CenterCrop(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] train_dataset = datasets.ImageFolder( traindir, transform=transforms.Compose(transformations_train)) val_dataset = datasets.ImageFolder( valdir, transform=transforms.Compose(transformations_val)) # Load up individual categories for AoA validation if args.aoaval: print("Loading individual categories for validation") val_list_dataset = [] val_list_loader = [] val_list_remap = [] for entry in valdir_list: val_list_dataset.append( datasets.ImageFolder( entry['pth'], transform=transforms.Compose(transformations_val))) val_list_loader.append( torch.utils.data.DataLoader(val_list_dataset[-1], batch_size=50, shuffle=False, num_workers=args.workers)) val_list_remap.append(train_dataset.classes.index(entry['node'])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=int(args.batch_size / 2), shuffle=False, num_workers=args.workers) # logistic regression print("Setting up regression") reglog = RegLog(args.conv, len(train_dataset.classes)).cuda() optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, reglog.parameters()), args.lr, momentum=args.momentum, weight_decay=10**args.weight_decay) # create logs exp_log = os.path.join(tmppth, 'log') if not os.path.isdir(exp_log): os.makedirs(exp_log) loss_log = Logger(os.path.join(exp_log, 'loss_log')) prec1_log = Logger(os.path.join(exp_log, 'prec1')) prec5_log = Logger(os.path.join(exp_log, 'prec5')) # If savedmodel already exists, load this print("Looking for saved decoder") if args.toplayer_epochs: filename = "model_toplayer_epoch_%d.pth.tar" % (args.toplayer_epochs - 1) else: filename = 'model_best.pth.tar' savedmodelpth = os.path.join(tmppth, filename) s3_client = boto3.client('s3') try: # Try to download desired toplayer_epoch response = s3_client.download_file( args.linearclassbucket, os.path.join(linearclassfn, filename), savedmodelpth) print('Loading saved decoder %s (s3: %s)' % (savedmodelpth, os.path.join(linearclassfn, filename))) model_with_decoder = torch.load(savedmodelpth) reglog.load_state_dict(model_with_decoder['reglog_state_dict']) lastepoch = model_with_decoder['epoch'] except: try: # Fallback to last saved toplayer_epoch, which we'll use as a starting point response = s3_client.download_file( args.linearclassbucket, os.path.join(linearclassfn, 'model_best.pth.tar'), savedmodelpth) print('Loading best-so-far saved decoder %s (s3:%s)' % (savedmodelpth, os.path.join(linearclassfn, 'model_best.pth.tar'))) model_with_decoder = torch.load(savedmodelpth) print('Previous model epoch %d' % model_with_decoder['epoch']) # But check it isn't greater than desired stage before loading if model_with_decoder['epoch'] <= args.toplayer_epochs: lastepoch = model_with_decoder['epoch'] reglog.load_state_dict(model_with_decoder['reglog_state_dict']) else: print('Previous model epoch %d was past desired one %d' % (model_with_decoder['epoch'], args.toplayer_epochs)) lastepoch = 0 except: lastepoch = 0 print("Will run from epoch %d to epoch %d" % (lastepoch, args.toplayer_epochs - 1)) for epoch in range(lastepoch, args.toplayer_epochs): # Top layer epochs end = time.time() # train for one epoch train(train_loader, model, reglog, criterion, optimizer, epoch) # evaluate on validation set prec1, prec5, loss = validate(val_loader, model, reglog, criterion, target_remap=range(1000)) loss_log.log(loss) prec1_log.log(prec1) prec5_log.log(prec5) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) filename = 'model_toplayer_epoch_%d.pth.tar' % epoch modelfn = os.path.join(tmppth, filename) torch.save( { 'epoch': epoch + 1, 'arch': 'alexnet', 'state_dict': model.state_dict(), 'reglog_state_dict': reglog.state_dict(), # Also save decoding layers 'prec5': prec5, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, savedmodelpth) # Save output to check s3_client = boto3.client('s3') response = s3_client.upload_file(savedmodelpth, args.linearclassbucket, os.path.join(linearclassfn, filename)) for logfile in ['prec1', 'prec5', 'loss_log']: localfn = os.path.join(tmppth, 'log', logfile) response = s3_client.upload_file( localfn, args.linearclassbucket, os.path.join(linearclassfn, 'log', "%s_toplayer_epoch_%d" % (logfile, epoch))) if is_best: # Save output to check s3_client = boto3.client('s3') response = s3_client.upload_file( savedmodelpth, args.linearclassbucket, os.path.join(linearclassfn, 'model_best.pth.tar')) for logfile in ['prec1', 'prec5', 'loss_log']: localfn = os.path.join(tmppth, 'log', logfile) response = s3_client.upload_file( localfn, args.linearclassbucket, os.path.join(linearclassfn, 'log', logfile)) # Tidy up for logfile in ['prec1', 'prec5', 'loss_log']: localfn = os.path.join(tmppth, 'log', logfile) os.remove(localfn) os.remove(savedmodelpth) if args.aoaval: # Validate individual categories, so loss can be compared to AoA # # To check weights loaded OK # # evaluate on validation set # prec1, prec5, loss = validate(val_loader, model, reglog, criterion) # loss_log.log(loss) # prec1_log.log(prec1) # prec5_log.log(prec5) aoares = {} for idx, row in enumerate( zip(valdir_list, val_list_loader, val_list_remap)): # evaluate on validation set print("AOA validation %d/%d" % (idx, len(valdir_list))) prec1_tmp, prec5_tmp, loss_tmp = validate(row[1], model, reglog, criterion, target_remap=[row[2]]) aoares[row[0]['node']] = { 'prec1': float(prec1_tmp), 'prec5': float(prec5_tmp), 'loss': float(loss_tmp), 'aoa': row[0]['aoa'] } # Save to JSON aoaresultsfn = 'aoaresults_toplayer_epoch_%d.json' % ( args.toplayer_epochs - 1) aoapth = os.path.join(tmppth, aoaresultsfn) with open(aoapth, 'w') as f: json.dump(aoares, f) response = s3_client.upload_file( aoapth, args.linearclassbucket, os.path.join(linearclassfn, aoaresultsfn)) os.remove(aoapth) # Clean up temporary directories os.rmdir(exp_log) os.rmdir(tmppth)
class Watcher(object): exchanges = {} def __init__(self, settings): self.set_settings(settings) self.L = Logger(settings) self.L.log('Setup watcher for %s' % (self.exchange_names), 'info') self.load_exchanges(settings) def set_settings(self, settings): self.trade_threshold = settings['trade_threshold'] if self.trade_threshold <= 0: raise Error('settings variable trade_threshold must be above 0!') self.exchange_names = settings['exchanges'] self.poll_interval = settings['poll_interval'] def load_exchanges(self, settings): c_name = '%sExchange' modules = zip(self.exchange_names, [__import__('src.exchanges', fromlist=[str(c_name%e)]) for e in self.exchange_names]) exchange_classes = [(e, getattr(module, c_name % e)) for e, module in modules] for name, klass in exchange_classes: self.exchanges[name] = klass(settings) self.L.log('Loaded exchanges %s' % self.exchanges, 'info') def find_trade(self): buys = [(name, exch.buy_price()) for name, exch in self.exchanges.iteritems()] sells = [(name, exch.sell_price()) for name, exch in self.exchanges.iteritems()] #find the minimum buy and the max sell price min_buy = min(buys, key = lambda x: x[1]) max_sell = max(sells, key = lambda x : x[1]) if max_sell[1] - min_buy[1] > self.trade_threshold: self.L.log('Possible Trade opportunity:', 'info') self.L.log('Buy from %s @ %s and sell to %s @ %s' % (min_buy + max_sell), 'info') else: self.L.log('No trading opportunity', 'info') self.L.log('Min buy from %s @ %s | Max sell to %s @ %s' % (min_buy + max_sell), 'info') def watch(self): while True: self.find_trade() time.sleep(self.poll_interval)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data tra = [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))] # load the data end = time.time() # MNIST-full begin:------------------------------------------- dataset = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose(tra)) true_label = dataset.train_labels.cpu().numpy() # MNIST-full end:------------------------------------------- # # FMNIST begin:------------------------------------------- # dataset = datasets.FashionMNIST('./data/fmnist', train=True, download=True, # transform=transforms.Compose(tra)) # true_label = dataset.train_labels.cpu().numpy() # # FMNIST end:------------------------------------------- # # MNIST-test begin:------------------------------------------- # dataset = datasets.MNIST('./data', train=False, download=True, # transform=transforms.Compose(tra)) # true_label = dataset.test_labels.cpu().numpy() # # MNIST-test end:------------------------------------------- # dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) # if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels # train_dataset = clustering.cluster_assign(deepcluster.images_lists, # dataset.train_data) train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.train_data) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: # print('###### Epoch [{0}] ###### \n' # 'Time: {1:.3f} s\n' # 'Clustering loss: {2:.3f} \n' # 'ConvNet loss: {3:.3f}' # .format(epoch, time.time() - end, clustering_loss, loss)) try: y_pred = clustering.arrange_clustering( deepcluster.images_lists) y_last = clustering.arrange_clustering(cluster_log.data[-1]) import metrics acc = metrics.acc(y_pred, y_last) nmi = metrics.nmi(y_pred, y_last) acc_ = metrics.acc(true_label, y_pred) nmi_ = metrics.nmi(true_label, y_pred) print( 'ACC=%.4f, NMI=%.4f; Relative ACC=%.4f, Relative NMI=%.4f' % (acc_, nmi_, acc, nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args args = parser.parse_args() #fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) best_prec1 = 0 # load model model = load_model(args.model) cudnn.benchmark = True # freeze the features layers for block in model.module: try: for param in block.parameters(): param.requires_grad = False except: for layer in block: for param in layer.parameters(): param.requires_grad = False # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val_in_folders') valdir_double = os.path.join(args.data, 'val_in_double_folders') valdir_list = [] # Load in AoA table if needed if args.aoaval: aoalist = pd.read_csv('matchingAoA_ImageNet_excel.csv') for index, row in aoalist.iterrows(): node = row['node'] aoa = float(row['aoa']) if not math.isnan(aoa): valdir_list.append({ 'node': node, 'pth': os.path.join(valdir_double, node), 'aoa': aoa }) else: print('Not found %s' % node) #valdir_list=valdir_list[:5] trim for testing print('Using %d validation categories for aoa' % len(valdir_list)) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Can't do validation if the tencrops option is chosne a if args.tencrops: transformations_val = [ transforms.Resize(256), transforms.TenCrop(224), transforms.Lambda(lambda crops: torch.stack( [normalize(transforms.ToTensor()(crop)) for crop in crops])), ] else: transformations_val = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] transformations_train = [ transforms.Resize(256), transforms.CenterCrop(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] train_dataset = datasets.ImageFolder( traindir, transform=transforms.Compose(transformations_train)) val_dataset = datasets.ImageFolder( valdir, transform=transforms.Compose(transformations_val)) # Load up individual categories for AoA validation if args.aoaval: val_list_dataset = [] val_list_loader = [] for entry in valdir_list: val_list_dataset.append( datasets.ImageFolder( entry['pth'], transform=transforms.Compose(transformations_val))) val_list_loader.append( torch.utils.data.DataLoader(val_list_dataset[-1], batch_size=50, shuffle=False, num_workers=args.workers)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=int(args.batch_size / 2), shuffle=False, num_workers=args.workers) # logistic regression reglog = RegLog(args.conv, len(train_dataset.classes)).cuda() optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, reglog.parameters()), args.lr, momentum=args.momentum, weight_decay=10**args.weight_decay) # create logs exp_log = os.path.join(args.exp, 'log') if not os.path.isdir(exp_log): os.makedirs(exp_log) loss_log = Logger(os.path.join(exp_log, 'loss_log')) prec1_log = Logger(os.path.join(exp_log, 'prec1')) prec5_log = Logger(os.path.join(exp_log, 'prec5')) for epoch in range(args.epochs): end = time.time() # If savedmodel already exists, load this savedmodelpth = os.path.join(args.exp, 'model_best.pth.tar') if os.path.exists(savedmodelpth): print('Loading saved decoder %s' % savedmodelpth) model_with_decoder = torch.load(savedmodelpth) reglog.load_state_dict(model_with_decoder['reglog_state_dict']) else: # train for one epoch train(train_loader, model, reglog, criterion, optimizer, epoch) # evaluate on validation set prec1, prec5, loss = validate(val_loader, model, reglog, criterion) loss_log.log(loss) prec1_log.log(prec1) prec5_log.log(prec5) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: filename = 'model_best.pth.tar' else: filename = 'checkpoint.pth.tar' torch.save( { 'epoch': epoch + 1, 'arch': 'alexnet', 'state_dict': model.state_dict(), 'reglog_state_dict': reglog.state_dict(), # Also save decoding layers 'prec5': prec5, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, savedmodelpth) if args.aoaval: # Validate individual categories, so loss can be compared to AoA # # To check weights loaded OK # # evaluate on validation set # prec1, prec5, loss = validate(val_loader, model, reglog, criterion) # loss_log.log(loss) # prec1_log.log(prec1) # prec5_log.log(prec5) aoares = {} for idx, row in enumerate(zip(valdir_list, val_list_loader)): # evaluate on validation set print("AOA validation %d/%d" % (idx, len(valdir_list))) prec1_tmp, prec5_tmp, loss_tmp = validate( row[1], model, reglog, criterion) aoares[row[0]['node']] = { 'prec1': float(prec1_tmp), 'prec5': float(prec5_tmp), 'loss': float(loss_tmp), 'aoa': row[0]['aoa'] } # Save to JSON aoapth = os.path.join(args.exp, 'aoaresults.json') with open(aoapth, 'w') as f: json.dump(aoares, f)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}' .format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1]) ) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save({'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict()}, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) logs = [] # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) if args.arch == 'inceptionv1': model = models.__dict__[args.arch](sobel=args.sobel, weight_file='/home/farbod/honours/convert/inception1/kit_pytorch.npy', out=args.num_classes) else: model = models.__dict__[args.arch](sobel=args.sobel, out=args.num_classes) fd = int(model.top_layer.weight.size()[1]) if args.arch == 'inceptionv1' or args.arch == 'mnist': for key in model.modules(): if isinstance(key, nn.Module): continue key = torch.nn.DataParallel(key).cuda() else: model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer1 = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) optimizer2 = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) optimizer2 = optimizer1 # define loss function criterion = entropy.EntropyLoss().cuda() #criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint model.top_layer = None for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) model.top_layer = nn.Linear(4096, args.num_classes) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer = model.top_layer.cuda() #optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) #for param in model.parameters(): # param.requires_grad = False #for param in model.classifier.parameters(): # param.requires_grad = True #for param in model.top_layer.parameters(): # param.requires_grad = True # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) plot_dir = os.path.join(args.exp, 'plots') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) # creating logger logger = Logger(os.path.join(args.exp, 'log')) # preprocessing of data #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) tra = [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True, shuffle=True) #sampler = UnifLabelSampler(int(len(dataset)), # last_assignment) #loader = torch.utils.data.DataLoader(dataset, # batch_size=args.batch, # num_workers=args.workers, # pin_memory=True, # sampler=sampler) noshuff_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch/2, num_workers=args.workers, pin_memory=True, shuffle=False) # get ground truth labels for nmi #num_classes = args.num_classes num_classes = args.num_classes labels = [ [] for i in range(num_classes) ] for i, (_, label) in enumerate(dataset.imgs): labels[label].append(i) last_assignment = None # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() last_assignment = None loss, predicted = train(loader, noshuff_loader, model, criterion, optimizer1, optimizer2, epoch, last_assignment) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'ConvNet loss: {2:.3f}' .format(epoch, time.time() - end, loss)) nmi_prev = 0 nmi_gt = 0 try: nmi_prev = normalized_mutual_info_score( predicted, logger.data[-1] ) print('NMI against previous assignment: {0:.3f}'.format(nmi_prev)) except IndexError: pass nmi_gt = normalized_mutual_info_score( predicted, clustering.arrange_clustering(labels) ) print('NMI against ground-truth labels: {0:.3f}'.format(nmi_gt)) print('####################### \n') logs.append([epoch, loss, nmi_prev, nmi_gt]) # save running checkpoint if (epoch + 1) % 10 == 0 or epoch == 0: torch.save({'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer1' : optimizer1.state_dict(), 'optimizer2' : optimizer2.state_dict()}, os.path.join(args.exp, 'checkpoint_{}.pth.tar'.format(epoch+1))) # save cluster assignments logger.log(predicted) last_assignment = [[] for i in range(args.num_classes)] for i in range(len(predicted)): last_assignment[predicted[i]].append(i) for i in last_assignment: print len(i) scipy.io.savemat(os.path.join(args.exp, 'logs.mat'), { 'logs': np.array(logs)})
class Cache(MemSysComponent): def __init__(self, sys, clk, user_id, level, num_load_mshrs, num_parallel_stores, cache_size, line_size, latency, logger_on, parent_component_id, child_component_id): super().__init__("L" + str(level) + " Cache " + str(user_id), clk, sys, parent_component_id, child_component_id) self.level = level self.num_load_mshrs = num_load_mshrs self.num_parallel_stores = num_parallel_stores self.load_stall_queue = [] self.store_stall_queue = [] self.load_mshr_bank = MSHRBank(self.num_load_mshrs) self.logger = Logger(self.name, logger_on, self.sys) # Cache Configuration self.tlb_size = 32 self.cache_size = cache_size self.line_size = line_size self.max_size = self.cache_size / self.line_size self.latency = latency self.accesses = [] self.cache = [0, 0] self.load_queue = [] self.store_queue = [] self.offset_bits = int(math.log2(self.line_size)) self.word_size = 64 self.byte_addressable = True def reset(self): self.load_stall_queue = [] self.store_stall_queue = [] self.load_mshr_bank.mshrs = [] self.accesses = [] self.cache = [0, 0] self.load_queue = [] self.store_queue = [] super().reset() def get_cache_line(self, address): return address >> self.offset_bits def peek(self, address): cache_line = self.get_cache_line(address) hit = False for line in self.accesses: if line == cache_line: return True return False def load(self, address): self.logger.log("Load " + str(hex(address))) self.is_idle = False cache_line = self.get_cache_line(address) hit = False for line in self.accesses: if line == cache_line: self.cache[0] += 1 self.accesses.remove(cache_line) self.accesses.insert(0, cache_line) hit = True break if hit: self.logger.log("Hit " + str(hex(address))) self.load_queue.append([address, self.latency]) elif self.load_mshr_bank.isInMSHR(cache_line): self.logger.log("Already waiting on memory access to cache line " + str(hex(cache_line)) + ".") else: self.logger.log("Miss " + str(hex(cache_line))) if self.load_mshr_bank.isMSHRAvailable(): self.load_mshr_bank.write(cache_line) self.lower_load(address) else: self.logger.log("Stall " + str(hex(address))) self.load_stall_queue.append(address) def store(self, address): self.logger.log("Store " + str(hex(address))) self.is_idle = False if len(self.store_queue) < self.num_parallel_stores: self.store_queue.append([address, self.latency]) else: self.store_stall_queue.append(address) def complete_store(self, address): cache_line = self.get_cache_line(address) hit = False for line in self.accesses: if line == cache_line: self.cache[0] += 1 self.accesses.remove(cache_line) self.accesses.insert(0, cache_line) hit = True break if hit: self.logger.log("Write Hit " + str(hex(address))) else: self.logger.log("Write Miss " + str(hex(cache_line))) self.accesses.insert(0, cache_line) if len(self.accesses) > self.max_size: address = self.accesses.pop() self.lower_store( address << int(math.log(self.line_size) / math.log(2))) def complete_load(self, address): self.logger.log("Completing load: " + str(hex(address))) cache_line = self.get_cache_line(address) if self.load_mshr_bank.isInMSHR(cache_line): self.load_mshr_bank.clear(cache_line) for line in self.accesses: if line == cache_line: self.accesses.remove(cache_line) break self.accesses.insert(0, cache_line) if len(self.accesses) > self.max_size: evict_address = self.accesses.pop() self.lower_store(evict_address << int( math.log(self.line_size) / math.log(2))) self.load_queue.append([address, self.latency]) while self.load_mshr_bank.isMSHRAvailable() and len( self.load_stall_queue) > 0: self.load(self.load_stall_queue.pop(0)) def advance_load(self, cycles): self.logger.log([(hex(a), c) for (a, c) in self.load_queue]) remove_list = [] for i in range(len(self.load_queue)): self.load_queue[i][1] -= cycles if self.load_queue[i][1] <= 0: for cid in self.parent_component: self.logger.log("Handing over to " + self.sys.hierarchy[cid].name + ".") cache_line = self.load_queue[i][0] >> int( math.log(self.line_size) / math.log(2)) self.return_load(self.load_queue[i][0]) remove_list.append(i) remove_list.reverse() for i in remove_list: self.load_queue.pop(i) def advance_store(self, cycles): remove_list = [] for i in range(len(self.store_queue)): self.store_queue[i][1] -= cycles if self.store_queue[i][1] <= 0: address = int(self.store_queue[i][0]) remove_list.append(i) self.complete_store(address) remove_list.reverse() for i in remove_list: self.store_queue.pop(i) remove_list = [] i = 0 for addr in self.store_stall_queue: if len(self.store_queue) < self.num_parallel_stores: self.store_queue.append([addr, self.latency]) remove_list.append(i) i += 1 remove_list.reverse() for i in remove_list: self.store_stall_queue.pop(i) def advance(self, cycles): self.clk += cycles self.advance_load(cycles) self.advance_store(cycles) if len(self.load_queue) == 0 and \ len(self.load_stall_queue) == 0 and \ len(self.store_queue) == 0 and \ len(self.store_stall_queue) == 0: self.is_idle = True def flush(self): self.logger.log("Flush") for access in self.accesses: self.lower_store(access)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) logs = [] # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) if args.arch == 'inceptionv1': model = models.__dict__[args.arch]( sobel=args.sobel, weight_file='/home/farbod/honours/convert/kit_pytorch.npy') else: model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None if args.arch == 'inceptionv1': for key in model.modules(): if isinstance(key, nn.Module): continue key = torch.nn.DataParallel(key).cuda() else: model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True #for param in model.parameters(): # param.requires_grad = False #for param in model.classifier.parameters(): # param.requires_grad = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) #optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) plot_dir = os.path.join(args.exp, 'plots') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) tra = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # get ground truth labels for nmi num_classes = 65 labels = [[] for i in range(num_classes)] for i, (_, label) in enumerate(dataset.imgs): labels[label].append(i) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss, plot, davg = deepcluster.cluster(features, verbose=args.verbose) print davg if epoch < 20: plot.savefig(os.path.join(plot_dir, 'e{}'.format(epoch))) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) #for i, image in enumerate(train_dataset): # save_dir = os.path.join('./viz_emb_start', str(image[1])) # if not os.path.isdir(save_dir): # os.makedirs(save_dir) # imn = (image[0].data.cpu().numpy() * 112) + 112 # imn = np.swapaxes(imn, 0, 2) # imn = np.swapaxes(imn, 1, 0) # #print imn.astype('uint8') # #print imn.astype('uint8').shape # im = Image.fromarray(imn.astype('uint8')) # im.save(os.path.join(save_dir, '{}.jpg'.format(i))) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print( '###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) nmi_prev = 0 nmi_gt = 0 try: nmi_prev = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format( nmi_prev)) except IndexError: pass nmi_gt = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(labels)) print('NMI against ground-truth labels: {0:.3f}'.format(nmi_gt)) print('####################### \n') logs.append([epoch, clustering_loss, loss, nmi_prev, nmi_gt, davg]) # save running checkpoint if (epoch + 1) % 10 == 0 or epoch == 0: torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint_{}.pth.tar'.format(epoch + 1))) # save cluster assignments cluster_log.log(deepcluster.images_lists) scipy.io.savemat(os.path.join(args.exp, 'logs.mat'), {'logs': np.array(logs)})
def main(): global args use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") criterion = nn.CrossEntropyLoss() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: VGGMiniCBR') model = VGGMiniCBR(num_classes=10) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.to(device) cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10 ** args.wd, ) # optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, model.parameters())) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) cluster_log = Logger(os.path.join(exp_path, 'clusters')) tra = [ transforms.Grayscale(num_output_channels=1), transforms.RandomAffine(degrees=5, translate=(0.03, 0.03), scale=(0.95, 1.05), shear=5), transforms.ToTensor(), transforms.Normalize((mean_std[use_zca][0],), (mean_std[use_zca][1],)) ] end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # ignoring ReLU layer in classifier # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset), device) # ndarray, (60k, 512) [-0.019, 0.016] # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).to(device)) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.to(device) # train network with clusters as pseudo-labels end = time.time() # loss = train(train_dataloader, model, criterion, optimizer, epoch) loss = train(model, device, train_dataloader, optimizer, epoch, criterion) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}' .format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1]) ) writer.add_scalar('nmi/train', nmi, epoch) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save({'epoch': epoch + 1, 'arch': "VGGMiniCBR", 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, os.path.join(exp_path, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists) torch.save(model.state_dict(), os.path.join(args.exp, "mnist_cnn.pt"))
# logger # model model = Model(**model_config).to(0) # optimizer optimizer = AdamOptimizer(params=model.parameters(), lr=exp_config['lr'], grad_clip_value=exp_config['grad_clip_value'], grad_clip_norm=exp_config['grad_clip_norm']) logger_on = True if logger_on: logger = Logger(exp_config, model_config, data_config) # train / val loop for epoch in range(exp_config['n_epochs']): print('Epoch:', epoch) if logger_on: logger.log(train(train_data, model, optimizer, eval_length), 'train') logger.log( validation(val_data, model, eval_length, use_mean_pred=True), 'val') logger.save(model) else: train(train_data, model, optimizer, eval_length) validation(val_data, model, eval_length)
def wrapper(*args, **kwargs): try: method(*args, **kwargs) except KeyError as e: Logger.log(LogLevel.ERRO, repr(e))
def main(): global args args = parser.parse_args() print(args) # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # load model model = load_model(args.model) model.cuda() cudnn.benchmark = True # freeze the features layers for param in model.features.parameters(): param.requires_grad = False # creating cluster exp if not os.path.isdir(args.exp): os.makedirs(args.exp) print(model) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) if os.path.exists(os.path.join(args.exp, 'clusters')): print("=> loading cluster assignments") cluster_assignments = pickle.load(open(os.path.join(args.exp, 'clusters'), 'rb'))[0] else: # cluster the features by computing the pseudo-labels # 1) remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # 2) get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # 3) cluster the features print("clustering the features...") clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # 4) assign pseudo-labels cluster_log.log(deepcluster.images_lists) cluster_assignments = deepcluster.images_lists view_dataset = datasets.ImageFolder(args.data, transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor() ])) # cluster_assignments is a list of len(k), each element corresponds to indices for one cluster for c in range(args.nmb_cluster): cluster_indices = cluster_assignments[c] print("cluster {} have {} images".format(c, len(cluster_indices))) c_dataloader = torch.utils.data.DataLoader(view_dataset, batch_size=64, sampler=SubsetRandomSampler(cluster_indices)) for (images, targets) in c_dataloader: print("saving cluster {}".format(c), images.shape) torchvision.utils.save_image(images, os.path.join(args.exp, 'visualize-c{}.png'.format(c))) break # here we want to create a subdir for each cluster inside args.exp # create symbolic link from original all dataset to within the subdir for idx in cluster_indices: print(idx, view_dataset.samples[idx]) break
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) run = wandb.init(project='deepcluster4nlp', config=args) # load the data end = time.time() tokenizer = get_tokenizer() dataset = ImdbDataset(True, tokenizer) dataloader = get_dataloader(dataset, tokenizer, args.batch) if args.verbose: print(('Load dataset: {0:.2f} s'.format(time.time() - end))) # cluster_lists = [[i*len(dataset)//args.nmb_cluster + j for j in range(len(dataset)//args.nmb_cluster)] # for i in range(args.nmb_cluster)] # # reassigned_dataset = cluster_assign(cluster_lists, dataset) # # reassigned_dataloader = get_dataloader(reassigned_dataset, tokenizer) # CNN if args.verbose: print(('Architecture: {}'.format(args.arch))) model = textcnn(tokenizer, num_class_features=args.num_class_features) #model = models.__dict__[args.arch](tokenizer) #fd =int(model.top_layer.weight.size()[1]) # replaced by num_class_features model.reset_top_layer() #model.top_layer = None #model.features = torch.nn.DataParallel(model.features, device_ids=[0]) model.to(device) cudnn.benchmark = True # wandb.watch(model) # create optimizer optimizer = torch.optim.AdamW( [x for x in model.parameters() if x.requires_grad], lr=args.lr) #optimizer = torch.optim.SGD( # [x for x in model.parameters() if x.requires_grad], # lr=args.lr, # momentum=args.momentum, # weight_decay=10**args.wd # ) # define loss function criterion = nn.CrossEntropyLoss().to(device) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in copy.deepcopy(checkpoint['state_dict']): if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.reset_top_layer() #top_layer = None # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) should_save = False if epoch % 50 == 0 or epoch == args.epochs - 1: should_save = True if should_save: # save the features and dataset wandb_dataset1 = wandb.Artifact(name=f'data', type='dataset') with wandb_dataset1.new_file(f'data_epoch_{epoch}.csv') as f: pd.DataFrame(np.asanyarray([d['text'] for d in dataset.data ])).to_csv(f, sep='\t') run.use_artifact(wandb_dataset1) wandb_dataset2 = wandb.Artifact(name=f'features', type='dataset') with wandb_dataset2.new_file(f'features_epoch_{epoch}.csv') as f: pd.DataFrame(features).to_csv(f, sep='\t') run.use_artifact(wandb_dataset2) pd.DataFrame( np.asanyarray([[d['text'], d['sentiment']] for d in dataset.data ])).to_csv(f'res/data_epoch_{epoch}.tsv', sep='\t', index=None, header=['text', 'sentiment']) pd.DataFrame(features).to_csv(f'res/features_epoch_{epoch}.tsv', sep='\t', index=None, header=False) # cluster the features if args.verbose: print('Cluster the features') clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels if args.verbose: print('Assign pseudo labels') # train_dataset = clustering.cluster_assign(deepcluster.cluster_lists, # dataset.data) train_dataset = clustering.cluster_assign(deepcluster.cluster_lists, dataset) # uniformly sample per target # sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), # deepcluster.cluster_lists) # train_dataloader = torch.utils.data.DataLoader( # train_dataset, # batch_size=args.batch, # num_workers=args.workers, # sampler=sampler, # pin_memory=True, # ) train_dataloader = get_dataloader(train_dataset, tokenizer, args.batch) # set last fully connected layer model.set_top_layer(cluster_list_length=len(deepcluster.cluster_lists)) #model.classifier = nn.Sequential(*mlp) #model.top_layer = nn.Linear(num_class_features,len(deepcluster.cluster_lists) ) #model.top_layer.weight.data.normal_(0, 0.01) #model.top_layer.bias.data.zero_() #model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) summary_dict = { 'time': time.time() - end, 'clustering_loss': clustering_loss, 'convnet_loss': loss, 'clusters': len(deepcluster.cluster_lists) } # print log if args.verbose: print(('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss))) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.cluster_lists), clustering.arrange_clustering(cluster_log.data[-1])) summary_dict['NMI'] = nmi print(('NMI against previous assignment: {0:.3f}'.format(nmi))) except IndexError: pass print('####################### \n') # wandb log # wandb.log(summary_dict) # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) if epoch == args.epochs - 1: model_artifact = wandb.Artifact(name=f'model', type='model') model_artifact.add_file( os.path.join(args.exp, 'checkpoint.pth.tar')) run.use_artifact(model_artifact) # save cluster assignments cluster_log.log(deepcluster.cluster_lists)
def train(data_loaders, model, crit, opt): """Training of the CNN. Args: @param data_loaders: (torch.utils.data.DataLoader) Dataloaders dict for train and val phases model (nn.Module): CNN crit (torch.nn): loss opt (torch.optim.SGD): optimizer for every parameters with True requires_grad in model except top layer epoch (int) """ # logger epochs_log = Logger(os.path.join(args.exp, 'epochs300')) val_acc_history = [] best_acc = 0.0 # create an optimizer for the last fc layer optimizer_tl = torch.optim.SGD( model.top_layer.parameters(), lr=args.lr, weight_decay=10**args.wd, ) for epoch in range(args.start_epoch, args.epochs): losses = AverageMeter() epoch_dict = {'train': [], 'val': []} print('\n') print('Epoch {}/{}'.format(epoch + 1, args.epochs)) print('-' * 10) for phase in ['train', 'val']: if phase == 'train': # training mode model.train() else: # evaluate mode model.eval() running_loss = 0.0 running_corrects = 0 for i, sample in enumerate(data_loaders[phase]): input_var = torch.autograd.Variable(sample['image'].cuda()) labels = torch.as_tensor( np.array(sample['label'], dtype='int_')) labels = labels.type(torch.LongTensor).cuda() with torch.set_grad_enabled(phase == 'train'): output = model(input_var) loss = crit(output, labels) _, preds = torch.max(output, 1) if phase == 'train': # compute gradient and do SGD step opt.zero_grad() optimizer_tl.zero_grad() loss.backward() opt.step() optimizer_tl.step() # record loss losses.update(loss.data, input_var.size(0)) running_loss += loss.item() * input_var.size(0) running_corrects += torch.sum(preds == labels.data) if args.verbose and not i % 200: print('Epoch: [{0}][{1}/{2}]\n' 'Running loss:: {loss:.4f} \n' 'Running corrects: ({corrects:.4f}) \n'.format( epoch + 1, i + 1, len(data_loaders[phase]), loss=(loss.item() * input_var.size(0)), corrects=(torch.sum(preds == labels.data)))) epoch_loss = running_loss / len(data_loaders[phase].dataset) epoch_acc = running_corrects.double() / len( data_loaders[phase].dataset) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc if phase == 'val': val_acc_history.append([epoch_loss, epoch_acc]) epoch_dict[phase] = [ epoch + 1, epoch_loss, epoch_acc.item(), running_loss, running_corrects.item() ] epochs_log.log(epoch_dict) # save the model torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': opt.state_dict() }, os.path.join(args.exp, 'fine_tuning.pth.tar')) return val_acc_history
def main(): global args args = parser.parse_args() print(args) # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # load model model = load_model(args.model) model.cuda() cudnn.benchmark = True # freeze the features layers for param in model.features.parameters(): param.requires_grad = False # creating cluster exp if not os.path.isdir(args.exp): os.makedirs(args.exp) print(model) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [ transforms.Resize(32), transforms.CenterCrop(32), transforms.ToTensor(), normalize ] # load the data end = time.time() # dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) dataset = ImageNetDS(DATASET_ROOT + 'downsampled-imagenet-32/', 32, train=True, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) if os.path.exists(os.path.join(args.exp, 'clusters')): print("=> loading cluster assignments") cluster_assignments = pickle.load( open(os.path.join(args.exp, 'clusters'), 'rb'))[0] else: # cluster the features by computing the pseudo-labels # 1) remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # 2) get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # 3) cluster the features print("clustering the features...") clustering_loss = deepcluster.cluster(features, verbose=args.verbose, pca=args.pca) # 4) assign pseudo-labels cluster_log.log(deepcluster.images_lists) cluster_assignments = deepcluster.images_lists # view_dataset = datasets.ImageFolder(args.data, transform=transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor() # ])) view_dataset = ImageNetDS(DATASET_ROOT + 'downsampled-imagenet-32/', 32, train=True, transform=torchvision.transforms.ToTensor()) cluster_labels = np.ones(len(dataset.train_labels)) * -1 for c in range(args.nmb_cluster): cluster_indices = cluster_assignments[c] cluster_labels[cluster_indices] = c print("cluster {} have {} images".format(c, len(cluster_indices))) c_dataloader = torch.utils.data.DataLoader( view_dataset, batch_size=64, sampler=SubsetRandomSampler(cluster_indices)) for (images, targets) in c_dataloader: print("saving cluster {}".format(c), images.shape) torchvision.utils.save_image( images, os.path.join(args.exp, 'c{}.png'.format(c))) break filename = 'deepcluster-k{}-pca{}-cluster.pickle'.format( args.nmb_cluster, args.pca) save = {'label': cluster_labels} with open(filename, 'wb') as f: pickle.dump(save, f, protocol=pickle.HIGHEST_PROTOCOL) print("saved kmeans deepcluster cluster to {}".format(save))
'loss_D': (loss_D_A + loss_D_B), 'loss_D/full': (loss_D_A_full + loss_D_B_full), 'loss_D/mask': (loss_D_A_mask + loss_D_B_mask), } images_summary = { 'A/real_A': real_A, 'A/recovered_A': recovered_A, 'A/fake_B': fake_B, 'A/same_A': same_A, 'B/real_B': real_B, 'B/recovered_B': recovered_B, 'B/fake_A': fake_A, 'B/same_B': same_B, 'mask': mask, } logger.log(loss_summary, images=images_summary) logger.step() if i % 100 == 0: # Save models checkpoints torch.save(netG_A2B.state_dict(), get_state_path('netG_A2B')) torch.save(netG_B2A.state_dict(), get_state_path('netG_B2A')) torch.save(netD_A.state_dict(), get_state_path('netD_A')) torch.save(netD_B.state_dict(), get_state_path('netD_B')) if opt.use_mask: torch.save(netD_Am.state_dict(), get_state_path('netD_Am')) torch.save(netD_Bm.state_dict(), get_state_path('netD_Bm')) with open(os.path.join(_run_dir, 'state.json'), 'w') as fout: state_json = {**vars(opt), 'epoch': epoch, 'batch': i}
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) best_prec1 = 0 # load model model = load_model(args.model) model.cuda() cudnn.benchmark = True # freeze the features layers for param in model.features.parameters(): param.requires_grad = False # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # train_dataset, val_dataset = get_downsampled_imagenet_datasets(args) # # train_loader = torch.utils.data.DataLoader(train_dataset, # batch_size=args.batch_size, # shuffle=True, # num_workers=args.workers, # pin_memory=True) # # val_loader = torch.utils.data.DataLoader(val_dataset, # batch_size=args.batch_size, # shuffle=False, # num_workers=args.workers) train_loader, _, val_loader = get_cub200_data_loaders(args) train_loader, _, val_loader = get_pets_data_loaders(args) # logistic regression num_classes = len(np.unique(train_loader.dataset.targets)) print("num_classes", num_classes) reglog = RegLog(args.conv, num_classes).cuda() optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, reglog.parameters()), args.lr, momentum=args.momentum, weight_decay=10**args.weight_decay) # create logs exp_log = os.path.join(args.exp, 'log') if not os.path.isdir(exp_log): os.makedirs(exp_log) loss_log = Logger(os.path.join(exp_log, 'loss_log')) prec1_log = Logger(os.path.join(exp_log, 'prec1')) prec5_log = Logger(os.path.join(exp_log, 'prec5')) for epoch in range(args.epochs): end = time.time() # train for one epoch train(train_loader, model, reglog, criterion, optimizer, epoch) # evaluate on validation set prec1, prec5, loss = validate(val_loader, model, reglog, criterion) print("Validation: Average Loss: {}, Accuracy Prec@1 {}, Prec@5 {}". format(loss, prec1, prec5)) loss_log.log(loss) prec1_log.log(prec1) prec5_log.log(prec5) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: filename = 'model_best.pth.tar' else: filename = 'checkpoint.pth.tar' torch.save( { 'epoch': epoch + 1, 'arch': 'alexnet', 'state_dict': model.state_dict(), 'prec5': prec5, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, os.path.join(args.exp, filename))
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
from config import model_config, data_config, exp_config from data import load_data from lib.model import Model from util import Logger, train, validation, AdamOptimizer os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(exp_config['device']) torch.cuda.set_device(0) # data train_data, val_data = load_data(data_config, exp_config['batch_size']) # logger logger = Logger(exp_config, model_config, data_config) # model model = Model(**model_config).to(0) # optimizer optimizer = AdamOptimizer(params=model.parameters(), lr=exp_config['lr'], grad_clip_value=exp_config['grad_clip_value'], grad_clip_norm=exp_config['grad_clip_norm']) # train / val loop for epoch in range(exp_config['n_epochs']): print('Epoch:', epoch) logger.log(train(train_data, model, optimizer), 'train') logger.log(validation(val_data, model), 'val') logger.save(model)
def main(): global args args = parser.parse_args() #fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) best_prec1 = 0 # load model if args.l2: model = load_l2_model(args.model) else: model = load_model_resnet(args.model) model.cuda() summary(model, (3, 224, 224)) cudnn.benchmark = True # freeze the features layers for param in model.features.parameters(): param.requires_grad = False # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.tencrops: transformations_val = [ transforms.Resize(256), transforms.TenCrop(224), transforms.Lambda(lambda crops: torch.stack( [normalize(transforms.ToTensor()(crop)) for crop in crops])), ] else: transformations_val = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] transformations_train = [ transforms.Resize(256), transforms.CenterCrop(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] train_dataset = datasets.ImageFolder( traindir, transform=transforms.Compose(transformations_train)) val_dataset = datasets.ImageFolder( valdir, transform=transforms.Compose(transformations_val)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=int(args.batch_size / 2), shuffle=False, num_workers=args.workers) # logistic regression #c = nn.Linear(4*512, 1000).cuda() reglog = RegLog(args.conv, len(train_dataset.classes), args.l2).cuda() optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, reglog.parameters()), args.lr, momentum=args.momentum, weight_decay=10**args.weight_decay) # create logs exp_log = os.path.join(args.exp, 'log') if not os.path.isdir(exp_log): os.makedirs(exp_log) loss_log = Logger(os.path.join(exp_log, 'loss_log')) prec1_log = Logger(os.path.join(exp_log, 'prec1')) prec5_log = Logger(os.path.join(exp_log, 'prec5')) for epoch in range(args.epochs): end = time.time() # train for one epoch train(train_loader, model, reglog, criterion, optimizer, epoch) # evaluate on validation set prec1, prec5, loss = validate(val_loader, model, reglog, criterion) loss_log.log(loss) prec1_log.log(prec1) prec5_log.log(prec5) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: filename = 'model_best.pth.tar' else: filename = 'checkpoint.pth.tar' torch.save( { 'epoch': epoch + 1, 'arch': 'alexnet', 'state_dict': model.state_dict(), 'prec5': prec5, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, os.path.join(args.exp, filename))
class Memory(MemSysComponent): def __init__(self, sys, latency, max_parallel_loads, max_parallel_stores, tfrs_per_clk, bit_width, clk_speed, logger_on, parent_component_id, child_component_id): super().__init__("Memory", clk_speed, sys, parent_component_id, child_component_id) self.load_mem_queue = [] self.store_mem_queue = [] self.latency = latency self.logger = Logger(self.name, logger_on, self.sys) self.max_parallel_loads = max_parallel_loads self.max_parallel_stores = max_parallel_stores self.tfrs_per_clk = tfrs_per_clk self.bit_width = bit_width self.clk_speed = clk_speed #MHz self.clk = 0 def reset(self): self.load_mem_queue = [] self.store_mem_queue = [] super().reset() def print_bandwidth(self): bandwidth = self.clk_speed * self.tfrs_per_clk * self.bit_width print(str(bandwidth) + " Mbits/s") print(str(bandwidth / self.bit_width) + " MT/s") print(str(bandwidth / 8 / 1000) + " GB/s") def load(self, address): self.logger.log("Load " + str(hex(address))) self.load_mem_queue.append([address, self.latency]) self.is_idle = False def store(self, address): self.logger.log("Store " + str(hex(address))) self.store_mem_queue.append([address, self.latency]) self.is_idle = False def advance_load(self, cycles): self.logger.log("Load " + str([(hex(a), c) for (a, c) in self.load_mem_queue])) remove_list = [] for i in range(self.max_parallel_loads): if i < len(self.load_mem_queue): self.load_mem_queue[i][1] = self.load_mem_queue[i][1] - cycles if self.load_mem_queue[i][1] <= 0: self.return_load(self.load_mem_queue[i][0]) remove_list.append(i) remove_list.reverse() for i in remove_list: self.load_mem_queue.pop(i) def advance_store(self, cycles): self.logger.log("Store " + str(self.store_mem_queue)) remove_list = [] for i in range(self.max_parallel_stores): if i < len(self.store_mem_queue): self.store_mem_queue[i][ 1] = self.store_mem_queue[i][1] - cycles if self.store_mem_queue[i][1] <= 0: self.logger.log("Store " + str(self.store_mem_queue[i][0]) + " completed") remove_list.append(i) remove_list.reverse() for i in remove_list: self.store_mem_queue.pop(i) def advance(self, cycles): self.clk += cycles self.advance_load(cycles) self.advance_store(cycles) if len(self.load_mem_queue) == 0 and len(self.store_mem_queue) == 0: self.is_idle = True