def build_model(self, height, width, num_channels): self.config['height'] = height self.config['width'] = width self.config['num_channels'] = num_channels self.graph = tf.Graph() with self.graph.as_default(): self.model_graph = Factory(self.config.__dict__) print(self.model_graph) self.trainable_count = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]) print('\nNumber of trainable paramters', self.trainable_count) self.test_graph() ''' ------------------------------------------------------------------------------- GOOGLE COLAB ------------------------------------------------------------------------------------- ''' if self.config.colab: self.push_colab() self.config.push_colab = self.push_colab self.isBuild = True utils.save_args(self.config.dict(), self.config.model_name, self.config.summary_dir)
def setup_logging(self): self.experiments_root_dir = 'experiments' self.config.model_name = const.get_model_name(self.config.model_name, self.config) self.config.summary_dir = os.path.join( self.experiments_root_dir + "/" + self.config.log_dir + "/", self.config.model_name) self.config.checkpoint_dir = os.path.join( self.experiments_root_dir + "/" + self.config.checkpoint_dir + "/", self.config.model_name) self.config.results_dir = os.path.join( self.experiments_root_dir + "/" + self.config.result_dir + "/", self.config.model_name) #Flags flags_list = [ 'train', 'restore', 'plot', 'clustering', 'early_stopping', 'colab' ] self.flags = utils.Config({ your_key: self.config.__dict__[your_key] for your_key in flags_list }) # create the experiments dirs utils.create_dirs([ self.config.summary_dir, self.config.checkpoint_dir, self.config.results_dir ]) utils.save_args(self.config.__dict__, self.config.summary_dir)
def save(self): if self._saver: key = self._get_model_name() path_prefix = self._saver.save( self.sess, os.path.join(sys.path[0], 'saved_models/' + self._args['model_name'])) # save the model name to models.yaml utils.save_args({key: path_prefix}, filename='models.yaml')
def __init__(self, **kwrds): self.config = utils.Config(copy.deepcopy(const.config)) for key in kwrds.keys(): assert key in self.config.keys(), '{} is not a keyword, \n acceptable keywords: {}'.\ format(key, self.config.keys()) self.config[key] = kwrds[key] self.experiments_root_dir = 'experiments' utils.create_dirs([self.experiments_root_dir]) self.config.model_name = const.get_model_name(self.config.model_type, self.config) self.config.checkpoint_dir = os.path.join( self.experiments_root_dir + "/" + self.config.checkpoint_dir + "/", self.config.model_name) self.config.summary_dir = os.path.join( self.experiments_root_dir + "/" + self.config.summary_dir + "/", self.config.model_name) self.config.log_dir = os.path.join( self.experiments_root_dir + "/" + self.config.log_dir + "/", self.config.model_name) utils.create_dirs([ self.config.checkpoint_dir, self.config.summary_dir, self.config.log_dir ]) load_config = {} try: load_config = utils.load_args(self.config.model_name, self.config.summary_dir) self.config.update(load_config) self.config.update({ key: const.config[key] for key in ['kinit', 'bias_init', 'act_out', 'transfer_fct'] }) print('Loading previous configuration ...') except: print('Unable to load previous configuration ...') utils.save_args(self.config.dict(), self.config.model_name, self.config.summary_dir) if self.config.plot: self.latent_space_files = list() self.latent_space3d_files = list() self.recons_files = list() if hasattr(self.config, 'height'): try: self.config.restore = True self.build_model(self.config.height, self.config.width, self.config.num_channels) except: self.isBuild = False else: self.isBuild = False
type=float, default=0.8, help='probability bounding factor') parser.add_argument('--beta', type=float, default=0.1, help='Coarse detector increment') parser.add_argument('--sigma', type=float, default=0.5, help='cost for patch use') args = parser.parse_args() if not os.path.exists(args.cv_dir): os.makedirs(args.cv_dir) utils.save_args(__file__, args) def train(epoch): agent.train() rewards, rewards_baseline, policies = [], [], [] for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)): inputs = Variable(inputs) if not args.parallel: inputs = inputs.cuda() # Actions by the Agent probs = F.sigmoid(agent.forward(inputs)) alpha_hp = np.clip(args.alpha + epoch * 0.001, 0.6, 0.95) probs = probs * alpha_hp + (1 - alpha_hp) * (1 - probs)
import glob import pdb import math import collections import tensorboardX as tbx from utils import utils import torch.backends.cudnn as cudnn from flags import parser args = parser.parse_args() os.makedirs(args.cv_dir + '/' + args.name, exist_ok=True) utils.save_args(args) def test(epoch): model.eval() accuracies = [] all_attr_lab = [] all_obj_lab = [] all_pred = [] pairs = valloader.dataset.pairs objs = valloader.dataset.objs attrs = valloader.dataset.attrs if args.test_set == 'test': val_pairs = valloader.dataset.test_pairs
''' ------------------------------------------------------------------------------ GET ARGUMENTS ------------------------------------------------------------------------------ ''' # capture the config path from the run arguments # then process the json configuration file args = get_args() if (args.model_type != const.GMVAE and args.model_type != const.GMVAECNN): print('Choose a valid model_type!') sys.exit() config, flags = get_config_and_flags(args) # create the experiments dirs utils.create_dirs( [config.summary_dir, config.checkpoint_dir, config.results_dir]) utils.save_args(args, config.summary_dir) ''' ------------------------------------------------------------------------------ GET DATA ------------------------------------------------------------------------------ ''' print('\n Loading data...') data_train, data_valid, data_test = utils.load_data(config.dataset_name) ''' ------------------------------------------------------------------------------ GET NETWORK PARAMS ------------------------------------------------------------------------------ ''' network_params = Bunch() network_params.input_height = data_train.height network_params.input_width = data_train.width network_params.input_nchannels = data_train.num_channels network_params.hidden_dim = config.hidden_dim network_params.z_dim = config.z_dim
def main(): # Get arguments and start logging args = parser.parse_args() load_args(args.config, args) logpath = os.path.join(args.cv_dir, args.name) os.makedirs(logpath, exist_ok=True) save_args(args, logpath, args.config) writer = SummaryWriter(log_dir = logpath, flush_secs = 30) # Get dataset trainset = dset.CompositionDataset( root=os.path.join(DATA_FOLDER,args.data_dir), phase='train', split=args.splitname, model =args.image_extractor, num_negs=args.num_negs, pair_dropout=args.pair_dropout, update_features = args.update_features, train_only= args.train_only, open_world=args.open_world ) trainloader = torch.utils.data.DataLoader( trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) testset = dset.CompositionDataset( root=os.path.join(DATA_FOLDER,args.data_dir), phase=args.test_set, split=args.splitname, model =args.image_extractor, subset=args.subset, update_features = args.update_features, open_world=args.open_world ) testloader = torch.utils.data.DataLoader( testset, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers) # Get model and optimizer image_extractor, model, optimizer = configure_model(args, trainset) args.extractor = image_extractor train = train_normal evaluator_val = Evaluator(testset, model) print(model) start_epoch = 0 # Load checkpoint if args.load is not None: checkpoint = torch.load(args.load) if image_extractor: try: image_extractor.load_state_dict(checkpoint['image_extractor']) if args.freeze_features: print('Freezing image extractor') image_extractor.eval() for param in image_extractor.parameters(): param.requires_grad = False except: print('No Image extractor in checkpoint') model.load_state_dict(checkpoint['net']) start_epoch = checkpoint['epoch'] print('Loaded model from ', args.load) for epoch in tqdm(range(start_epoch, args.max_epochs + 1), desc = 'Current epoch'): train(epoch, image_extractor, model, trainloader, optimizer, writer) if model.is_open and args.model=='compcos' and ((epoch+1)%args.update_feasibility_every)==0 : print('Updating feasibility scores') model.update_feasibility(epoch+1.) if epoch % args.eval_val_every == 0: with torch.no_grad(): # todo: might not be needed test(epoch, image_extractor, model, testloader, evaluator_val, writer, args, logpath) print('Best AUC achieved is ', best_auc) print('Best HM achieved is ', best_hm)
model.train() return scores if __name__ == '__main__': args = get_args() if args.gpu_id < 0: setattr(args, "cuda", False) else: setattr(args, "cuda", True) random.seed(args.seed) np.random.seed(args.seed) torch.random.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed_all(args.seed) if args.cfg_path is not None: logger.info( f"[*] Note: cfg path is not None, read args from config - {args.cfg_path}" ) args = cfg_parser(args.cfg_path, args) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) prepare_dirs(args) save_args(args) main(args)
# 尝试分布式训练 # local_rank = torch.distributed.get_rank() # local_rank表示本台机器上的进程序号,是由torch.distributed.launch自动分配和传入的。 local_rank = args.local_rank # 根据local_rank来设定当前使用哪块GPU torch.cuda.set_device(local_rank) device = torch.device("cuda", local_rank) # 初始化DDP,使用默认backend(nccl)就行 torch.distributed.init_process_group(backend="nccl") print("args.local_rank={}".format(args.local_rank)) else: device = torch.device("cuda") # 尝试分布式训练 local_master = True if not args.distributed else args.local_rank == 0 utils.save_args(args) if local_master else None # 打印所用的参数 if local_master: logger.info('[Info] used parameters: {}'.format(vars(args))) torch.backends.cudnn.benchmark = True # https://blog.csdn.net/byron123456sfsfsfa/article/details/96003317 utils.check_path(args.checkpoint_dir) utils.save_args(args) if local_master else None filename = 'command_test.txt' if args.mode == 'test' else 'command_train.txt' utils.save_command(args.checkpoint_dir, filename) if local_master else None def main():