def test_one_shot_model(ckpath, use_train): from models import get_cell_based_tiny_net, get_search_spaces from datasets import get_datasets, SearchDataset from config_utils import load_config, dict2config from utils.nas_utils import evaluate_one_shot use_train = int(use_train) > 0 #ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-11416-basic.pth' #ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-28640-basic.pth' print ('ckpath : {:}'.format(ckpath)) ckp = torch.load(ckpath) xargs = ckp['args'] train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1) #config = load_config(xargs.config_path, {'class_num': class_num, 'xshape': xshape}, None) config = load_config('./configs/nas-benchmark/algos/DARTS.config', {'class_num': class_num, 'xshape': xshape}, None) if xargs.dataset == 'cifar10': cifar_split = load_config('configs/nas-benchmark/cifar-split.txt', None, None) xvalid_data = deepcopy(train_data) xvalid_data.transform = valid_data.transform valid_loader= torch.utils.data.DataLoader(xvalid_data, batch_size=2048, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar_split.valid), num_workers=12, pin_memory=True) else: raise ValueError('invalid dataset : {:}'.format(xargs.dataseet)) search_space = get_search_spaces('cell', xargs.search_space_name) model_config = dict2config({'name': 'SETN', 'C': xargs.channel, 'N': xargs.num_cells, 'max_nodes': xargs.max_nodes, 'num_classes': class_num, 'space' : search_space, 'affine' : False, 'track_running_stats': True}, None) search_model = get_cell_based_tiny_net(model_config) search_model.load_state_dict( ckp['search_model'] ) search_model = search_model.cuda() api = API('/home/dxy/.torch/NAS-Bench-201-v1_0-e61699.pth') archs, probs, accuracies = evaluate_one_shot(search_model, valid_loader, api, use_train)
def setup (): global app log.debug("Loading global app settings...") app = config_utils.load_config(paths.config_path(MAINFILE), paths.app_path(MAINSPEC)) log.debug("Loading keymap...") global keymap keymap = config_utils.load_config(paths.config_path("keymap.keymap"), paths.app_path("keymaps/"+app['app-settings']['load_keymap']))
def test_one_shot_model(ckpath, use_train): from models import get_cell_based_tiny_net, get_search_spaces from datasets import get_datasets, SearchDataset from config_utils import load_config, dict2config from utils.nas_utils import evaluate_one_shot use_train = int(use_train) > 0 # ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-11416-basic.pth' # ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-28640-basic.pth' print("ckpath : {:}".format(ckpath)) ckp = torch.load(ckpath) xargs = ckp["args"] train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) # config = load_config(xargs.config_path, {'class_num': class_num, 'xshape': xshape}, None) config = load_config( "./configs/nas-benchmark/algos/DARTS.config", { "class_num": class_num, "xshape": xshape }, None, ) if xargs.dataset == "cifar10": cifar_split = load_config("configs/nas-benchmark/cifar-split.txt", None, None) xvalid_data = deepcopy(train_data) xvalid_data.transform = valid_data.transform valid_loader = torch.utils.data.DataLoader( xvalid_data, batch_size=2048, sampler=torch.utils.data.sampler.SubsetRandomSampler( cifar_split.valid), num_workers=12, pin_memory=True, ) else: raise ValueError("invalid dataset : {:}".format(xargs.dataseet)) search_space = get_search_spaces("cell", xargs.search_space_name) model_config = dict2config( { "name": "SETN", "C": xargs.channel, "N": xargs.num_cells, "max_nodes": xargs.max_nodes, "num_classes": class_num, "space": search_space, "affine": False, "track_running_stats": True, }, None, ) search_model = get_cell_based_tiny_net(model_config) search_model.load_state_dict(ckp["search_model"]) search_model = search_model.cuda() api = API("/home/dxy/.torch/NAS-Bench-201-v1_0-e61699.pth") archs, probs, accuracies = evaluate_one_shot(search_model, valid_loader, api, use_train)
def main(xargs, nas_bench): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads( xargs.workers ) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) if xargs.dataset == 'cifar10': dataname = 'cifar10-valid' else: dataname = xargs.dataset if xargs.data_path is not None: train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1) split_Fpath = 'configs/nas-benchmark/cifar-split.txt' cifar_split = load_config(split_Fpath, None, None) train_split, valid_split = cifar_split.train, cifar_split.valid logger.log('Load split file from {:}'.format(split_Fpath)) config_path = 'configs/nas-benchmark/algos/R-EA.config' config = load_config(config_path, {'class_num': class_num, 'xshape': xshape}, logger) # To split data train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 search_data = SearchDataset(xargs.dataset, train_data, train_split, valid_split) # data loader train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split) , num_workers=xargs.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True) logger.log('||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config)) extra_info = {'config': config, 'train_loader': train_loader, 'valid_loader': valid_loader} else: config_path = 'configs/nas-benchmark/algos/R-EA.config' config = load_config(config_path, None, logger) logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config)) extra_info = {'config': config, 'train_loader': None, 'valid_loader': None} search_space = get_search_spaces('cell', xargs.search_space_name) random_arch = random_architecture_func(xargs.max_nodes, search_space) mutate_arch = mutate_arch_func(search_space) #x =random_arch() ; y = mutate_arch(x) x_start_time = time.time() logger.log('{:} use nas_bench : {:}'.format(time_string(), nas_bench)) logger.log('-'*30 + ' start searching with the time budget of {:} s'.format(xargs.time_budget)) history, total_cost = regularized_evolution(xargs.ea_cycles, xargs.ea_population, xargs.ea_sample_size, xargs.time_budget, random_arch, mutate_arch, nas_bench if args.ea_fast_by_api else None, extra_info, dataname) logger.log('{:} regularized_evolution finish with history of {:} arch with {:.1f} s (real-cost={:.2f} s).'.format(time_string(), len(history), total_cost, time.time()-x_start_time)) best_arch = max(history, key=lambda i: i.accuracy) best_arch = best_arch.arch logger.log('{:} best arch is {:}'.format(time_string(), best_arch)) info = nas_bench.query_by_arch( best_arch ) if info is None: logger.log('Did not find this architecture : {:}.'.format(best_arch)) else : logger.log('{:}'.format(info)) logger.log('-'*100) logger.close() return logger.log_dir, nas_bench.query_index_by_arch( best_arch )
def setup(): global app log.debug("Loading global app settings...") app = config_utils.load_config(paths.config_path(MAINFILE), paths.app_path(MAINSPEC)) log.debug("Loading keymap...") global keymap keymap = config_utils.load_config( paths.config_path("keymap.keymap"), paths.app_path("keymaps/" + app['app-settings']['load_keymap']))
def main(xargs, nas_bench): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads( xargs.workers ) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) assert xargs.dataset == 'cifar10', 'currently only support CIFAR-10' train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1) split_Fpath = 'configs/nas-benchmark/cifar-split.txt' cifar_split = load_config(split_Fpath, None, None) train_split, valid_split = cifar_split.train, cifar_split.valid logger.log('Load split file from {:}'.format(split_Fpath)) config_path = 'configs/nas-benchmark/algos/R-EA.config' config = load_config(config_path, {'class_num': class_num, 'xshape': xshape}, logger) # To split data train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 search_data = SearchDataset(xargs.dataset, train_data, train_split, valid_split) # data loader train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split) , num_workers=xargs.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True) logger.log('||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config)) extra_info = {'config': config, 'train_loader': train_loader, 'valid_loader': valid_loader} search_space = get_search_spaces('cell', xargs.search_space_name) random_arch = random_architecture_func(xargs.max_nodes, search_space) #x =random_arch() ; y = mutate_arch(x) logger.log('{:} use nas_bench : {:}'.format(time_string(), nas_bench)) best_arch, best_acc, total_time_cost, history = None, -1, 0, [] #for idx in range(xargs.random_num): while total_time_cost < xargs.time_budget: arch = random_arch() accuracy, cost_time = train_and_eval(arch, nas_bench, extra_info) if total_time_cost + cost_time > xargs.time_budget: break else: total_time_cost += cost_time history.append(arch) if best_arch is None or best_acc < accuracy: best_acc, best_arch = accuracy, arch logger.log('[{:03d}] : {:} : accuracy = {:.2f}%'.format(len(history), arch, accuracy)) logger.log('{:} best arch is {:}, accuracy = {:.2f}%, visit {:} archs with {:.1f} s.'.format(time_string(), best_arch, best_acc, len(history), total_time_cost)) info = nas_bench.query_by_arch( best_arch ) if info is None: logger.log('Did not find this architecture : {:}.'.format(best_arch)) else : logger.log('{:}'.format(info)) logger.log('-'*100) logger.close() return logger.log_dir, nas_bench.query_index_by_arch( best_arch )
def main(bam_file, config_file=None, chrom='all', start=0, end=None, outfile=None, normalize=False, use_tempfile=False): if config_file: config = load_config(config_file) else: config = {"program": {"ucsc_bigwig": "wigToBigWig"}} if outfile is None: outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] if start > 0: start = int(start) - 1 if end is not None: end = int(end) regions = [(chrom, start, end)] if os.path.abspath(bam_file) == os.path.abspath(outfile): sys.stderr.write("Bad arguments, input and output files are the same.\n") sys.exit(1) if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0): if use_tempfile: # Use a temp file to avoid any possiblity of not having write permission out_handle = tempfile.NamedTemporaryFile(delete=False) wig_file = out_handle.name else: wig_file = "%s.wig" % os.path.splitext(outfile)[0] out_handle = open(wig_file, "w") with closing(out_handle): chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle, normalize) try: if wig_valid: convert_to_bigwig(wig_file, chr_sizes, config, outfile) finally: os.remove(wig_file)
def fill_list(self): sessionsList = [] log.debug("Filling the sessions list.") self.sessions = [] for i in os.listdir(paths.config_path()): if os.path.isdir(paths.config_path(i)): log.debug("Adding session %s" % (i, )) strconfig = "%s/session.conf" % (paths.config_path(i)) config_test = config_utils.load_config(strconfig) name = config_test["twitter"]["user_name"] if config_test["twitter"]["user_key"] != "" and config_test[ "twitter"]["user_secret"] != "": sessionsList.append(name) self.sessions.append(i) else: try: log.debug("Deleting session %s" % (i, )) shutil.rmtree(paths.config_path(i)) except: output.speak( "An exception was raised while attempting to clean malformed session data. See the error log for details. If this message persists, contact the developers.", True) os.exception( "Exception thrown while removing malformed session" ) self.view.fill_list(sessionsList)
def LoadConfiguration(self): """ Set the configuration values specified in configuration.json Return True if configuration sucessfully loaded. """ setup_config() load_config() try: log.info("Loaded configuration") return True except Exception as e: log.critical(f"Failed to load configuration, {e}")
def get_config_custom(): # Read the custom configuration file and pass it to AngularJS service config.service.ts config = load_config(file_name='config.json') if config is None: response = json.dumps({'error': 'Unable to read config.json'}) else: response = json.dumps(config) return make_json_response(response)
def evaluate_all_datasets(arch, datasets, xpaths, splits, seed, arch_config, workers, logger): machine_info, arch_config = get_machine_info(), deepcopy(arch_config) all_infos = {'info': machine_info} all_dataset_keys = [] # look all the datasets for dataset, xpath, split in zip(datasets, xpaths, splits): # train valid data train_data, valid_data, xshape, class_num = get_datasets(dataset, xpath, -1) # load the configurature if dataset == 'cifar10' or dataset == 'cifar100': config_path = 'configs/nas-benchmark/CIFAR.config' split_info = load_config('configs/nas-benchmark/cifar-split.txt', None, None) elif dataset.startswith('ImageNet16'): config_path = 'configs/nas-benchmark/ImageNet-16.config' split_info = load_config('configs/nas-benchmark/{:}-split.txt'.format(dataset), None, None) else: raise ValueError('invalid dataset : {:}'.format(dataset)) config = load_config(config_path, \ {'class_num': class_num, 'xshape' : xshape}, \ logger) # check whether use splited validation set if bool(split): assert len(train_data) == len(split_info.train) + len(split_info.valid), 'invalid length : {:} vs {:} + {:}'.format(len(train_data), len(split_info.train), len(split_info.valid)) train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 # data loader train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(split_info.train), num_workers=workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(split_info.valid), num_workers=workers, pin_memory=True) else: # data loader train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True , num_workers=workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=workers, pin_memory=True) dataset_key = '{:}'.format(dataset) if bool(split): dataset_key = dataset_key + '-valid' logger.log('Evaluate ||||||| {:10s} ||||||| Train-Num={:}, Valid-Num={:}, Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(dataset_key, len(train_data), len(valid_data), len(train_loader), len(valid_loader), config.batch_size)) logger.log('Evaluate ||||||| {:10s} ||||||| Config={:}'.format(dataset_key, config)) results = evaluate_for_seed(arch_config, config, arch, train_loader, valid_loader, seed, logger) all_infos[dataset_key] = results all_dataset_keys.append( dataset_key ) all_infos['all_dataset_keys'] = all_dataset_keys return all_infos
def get_configuration(self): """ Gets settings for a session.""" file_ = "%s/session.conf" % (self.session_id,) # try: log.debug("Creating config file %s" % (file_,)) self.settings = config_utils.load_config(paths.config_path(file_), paths.app_path("Conf.defaults")) self.init_sound() self.deshelve()
def show_imagenet_16_120(dataset_dir=None): if dataset_dir is None: torch_home_dir = os.environ['TORCH_HOME'] if 'TORCH_HOME' in os.environ else os.path.join(os.environ['HOME'], '.torch') dataset_dir = os.path.join(torch_home_dir, 'cifar.python', 'ImageNet16') train_data, valid_data, xshape, class_num = get_datasets('ImageNet16-120', dataset_dir, -1) split_info = load_config('configs/nas-benchmark/ImageNet16-120-split.txt', None, None) print('=' * 10 + ' ImageNet-16-120 ' + '=' * 10) print('Training Data: {:}'.format(train_data)) print('Evaluation Data: {:}'.format(valid_data)) print('Hold-out training: {:} images.'.format(len(split_info.train))) print('Hold-out valid : {:} images.'.format(len(split_info.valid)))
def get_nas_search_loaders(train_data, valid_data, dataset, config_root, batch_size, workers): if isinstance(batch_size, (list,tuple)): batch, test_batch = batch_size else: batch, test_batch = batch_size, batch_size if dataset == 'cifar10': #split_Fpath = 'configs/nas-benchmark/cifar-split.txt' cifar_split = load_config('{:}/cifar-split.txt'.format(config_root), None, None) train_split, valid_split = cifar_split.train, cifar_split.valid # search over the proposed training and validation set #logger.log('Load split file from {:}'.format(split_Fpath)) # they are two disjoint groups in the original CIFAR-10 training set # To split data xvalid_data = deepcopy(train_data) if hasattr(xvalid_data, 'transforms'): # to avoid a print issue xvalid_data.transforms = valid_data.transform xvalid_data.transform = deepcopy( valid_data.transform ) search_data = SearchDataset(dataset, train_data, train_split, valid_split) # data loader search_loader = torch.utils.data.DataLoader(search_data, batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True) train_loader = torch.utils.data.DataLoader(train_data , batch_size=batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(xvalid_data, batch_size=test_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=workers, pin_memory=True) elif dataset == 'cifar100': cifar100_test_split = load_config('{:}/cifar100-test-split.txt'.format(config_root), None, None) search_train_data = train_data search_valid_data = deepcopy(valid_data) ; search_valid_data.transform = train_data.transform search_data = SearchDataset(dataset, [search_train_data,search_valid_data], list(range(len(search_train_data))), cifar100_test_split.xvalid) search_loader = torch.utils.data.DataLoader(search_data, batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True) train_loader = torch.utils.data.DataLoader(train_data , batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data , batch_size=test_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_test_split.xvalid), num_workers=workers, pin_memory=True) elif dataset == 'ImageNet16-120': imagenet_test_split = load_config('{:}/imagenet-16-120-test-split.txt'.format(config_root), None, None) search_train_data = train_data search_valid_data = deepcopy(valid_data) ; search_valid_data.transform = train_data.transform search_data = SearchDataset(dataset, [search_train_data,search_valid_data], list(range(len(search_train_data))), imagenet_test_split.xvalid) search_loader = torch.utils.data.DataLoader(search_data, batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True) train_loader = torch.utils.data.DataLoader(train_data , batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data , batch_size=test_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet_test_split.xvalid), num_workers=workers, pin_memory=True) else: raise ValueError('invalid dataset : {:}'.format(dataset)) return search_loader, train_loader, valid_loader
def __init__(self, title): self.recording=False wx.Frame.__init__(self, None, title=title, size=(350,200)) # initialize the wx frame # load config and validate a specification file self.MAINFILE = "uploader.cfg" self.MAINSPEC = "app.defaults" self.appconfig = config_utils.load_config(self.MAINFILE, self.MAINSPEC) # window events and controls self.Bind(wx.EVT_CLOSE, self.OnClose) self.panel = wx.Panel(self) self.main_box = wx.BoxSizer(wx.VERTICAL) self.select_file = wx.Button(self.panel, -1, "&Select File") self.select_file.Bind(wx.EVT_BUTTON, self.SelectFile) self.main_box.Add(self.select_file, 0, wx.ALL, 10) self.record = wx.Button(self.panel, -1, "&Record") self.record.Bind(wx.EVT_BUTTON, self.Record) self.main_box.Add(self.record, 0, wx.ALL, 10) self.link_label = wx.StaticText(self.panel, -1, "Audio U&RL") self.link = wx.TextCtrl(self.panel, -1, "",style=wx.TE_READONLY) self.link.SetValue("Waiting for audio...") self.main_box.Add(self.link, 0, wx.ALL, 10) self.key_label = wx.StaticText(self.panel, -1,"SNDUp API &Key") self.key = wx.TextCtrl(self.panel, -1, "") self.main_box.Add(self.key, 0, wx.ALL, 10) self.key.SetValue(self.appconfig["general"]["APIKey"]) self.services_label=wx.StaticText(self.panel, -1, "Upload to") self.services = wx.ComboBox(self.panel, -1, choices=services, value=services[0], style=wx.CB_READONLY) self.services.Bind(wx.EVT_COMBOBOX, self.on_service_change) self.main_box.Add(self.services, 0, wx.ALL, 10) self.upload = wx.Button(self.panel, -1, "&Upload") self.upload.Bind(wx.EVT_BUTTON, self.OnUpload) self.main_box.Add(self.upload, 0, wx.ALL, 10) self.upload.Hide() self.twitter_label = wx.StaticText(self.panel, -1,"Tweet Te&xt") self.twitter_text = wx.TextCtrl(self.panel, -1, "") self.main_box.Add(self.twitter_text, 0, wx.ALL, 10) self.twitter_text.Hide() self.tweet = wx.Button(self.panel, -1, "&Tweet") self.tweet.Bind(wx.EVT_BUTTON, self.Tweet) self.tweet.Hide() self.main_box.Add(self.tweet, 0, wx.ALL, 10) self.new = wx.Button(self.panel, -1, "&Attach another file") self.new.Bind(wx.EVT_BUTTON, self.Reset) self.main_box.Add(self.new, 0, wx.ALL, 10) self.new.Hide() self.close = wx.Button(self.panel, wx.ID_CLOSE, "&Close") self.close.Bind(wx.EVT_BUTTON, self.OnClose) self.main_box.Add(self.close, 0, wx.ALL, 10) self.panel.Layout()
def main(xargs): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads( xargs.workers ) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1) config = load_config(xargs.config_path, {'class_num': class_num, 'xshape': xshape}, logger) search_loader, _, valid_loader = get_nas_search_loaders(train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/', \ (config.batch_size, config.test_batch_size), xargs.workers) logger.log('||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(xargs.dataset, len(search_loader), len(valid_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config)) search_space = get_search_spaces('cell', xargs.search_space_name) model_config = dict2config({'name': 'SPOS', 'C': xargs.channel, 'N': xargs.num_cells, 'max_nodes': xargs.max_nodes, 'num_classes': class_num, 'space' : search_space, 'affine' : False, 'track_running_stats': bool(xargs.track_running_stats)}, None) logger.log('search space : {:}'.format(search_space)) model = get_cell_based_tiny_net(model_config) flop, param = get_model_infos(model, xshape) logger.log('FLOP = {:.2f} M, Params = {:.2f} MB'.format(flop, param)) logger.log('search-space : {:}'.format(search_space)) if xargs.arch_nas_dataset is None: api = None else: api = API(xargs.arch_nas_dataset) logger.log('{:} create API = {:} done'.format(time_string(), api)) checkpoint_path_template = '{}/checkpoint/seed-{}_epoch-{}.pth' logger.log("=> loading checkpoint from {}".format(checkpoint_path_template.format(args.save_dir, args.rand_seed, 0))) load(checkpoint_path_template.format(args.save_dir, args.rand_seed, 0), model) init_model = deepcopy(model) angles = [] for epoch in range(xargs.epochs): genotype = load(checkpoint_path_template.format(args.save_dir, args.rand_seed, epoch), model) logger.log("=> loading checkpoint from {}".format(checkpoint_path_template.format(args.dataset, args.rand_seed, epoch))) cur_model = deepcopy(model) angle = get_arch_angle(init_model, cur_model, genotype, search_space) logger.log('[{:}] cal angle : angle={}'.format(epoch, angle)) angle = round(angle,2) angles.append(angle) print(angles)
def show_imagenet_16_120(dataset_dir=None): if dataset_dir is None: torch_home_dir = (os.environ["TORCH_HOME"] if "TORCH_HOME" in os.environ else os.path.join( os.environ["HOME"], ".torch")) dataset_dir = os.path.join(torch_home_dir, "cifar.python", "ImageNet16") train_data, valid_data, xshape, class_num = get_datasets( "ImageNet16-120", dataset_dir, -1) split_info = load_config("configs/nas-benchmark/ImageNet16-120-split.txt", None, None) print("=" * 10 + " ImageNet-16-120 " + "=" * 10) print("Training Data: {:}".format(train_data)) print("Evaluation Data: {:}".format(valid_data)) print("Hold-out training: {:} images.".format(len(split_info.train))) print("Hold-out valid : {:} images.".format(len(split_info.valid)))
def send_to_terra(): """Send manifest data to Terra data framework. Returns URL to UI that will open in new window.""" file_to_retrieve = 'terra' #dummy value to prevent erroring out config = load_config(file_name='config.json') module_name = config['cloud-options']['service-name'] + '_handler' # aws_handler, download_handler class_name = config['cloud-options']['service-name'].title() + 'Handler' # AwsHandler, #DownloadHandler handler = None import importlib module = importlib.import_module(module_name) handler_class = getattr(module, class_name) handler = handler_class(file_to_retrieve=file_to_retrieve) file_data = handler.handle_manifest(request) metadata = handler.handle_metadata(request) json_entities = handler.build_json(file_data, metadata) return handler.post_json_to_terra(json_entities)
def download(date_range): from utils import download_utils as du from netCDF4 import Dataset as ncDataset import os import cdsapi config = load_config() c = cdsapi.Client() du.check_pandas_datetime_index(date_range) local_name = os.path.join(config['path']['raw'], 'era5/{year}/era5_{year}{month:02d}_u10_slp.nc') month_range = du.date_range_to_custom_freq(date_range) for t in month_range: local_path = local_name.format(year=t.year, month=t.month) request_ERA5_U10_SLP(cds_client, t.year, t.month, local_path)
def get_cart_metadata(): file_to_retrieve = 'metadata' config = load_config(file_name='config.json') module_name = config['cloud-options']['service-name'] + '_handler' # aws_handler, download_handler class_name = config['cloud-options']['service-name'].title() + 'Handler' # AwsHandler, #DownloadHandler handler = None import importlib module = importlib.import_module(module_name) handler_class = getattr(module, class_name) handler = handler_class(file_to_retrieve=file_to_retrieve) if config['cloud-options']['service-name'].lower() == 'aws': # AWS returns a manifest ID {'manifest_id': 'xxx-xxxxxxx-xxxx-xxxxx'} return handler.handle_metadata(request) else: # Download the file metadata = handler.handle_metadata(request) return handler.download_file(request, metadata)
def fill_list(self): sessionsList = [] log.debug("Filling the sessions list.") self.sessions = [] for i in os.listdir(paths.config_path()): if os.path.isdir(paths.config_path(i)): log.debug("Adding session %s" % (i,)) strconfig = "%s/session.conf" % (paths.config_path(i)) config_test = config_utils.load_config(strconfig) name = config_test["twitter"]["user_name"] if config_test["twitter"]["user_key"] != "" and config_test["twitter"]["user_secret"] != "": sessionsList.append(name) self.sessions.append(i) else: try: log.debug("Deleting session %s" % (i,)) shutil.rmtree(paths.config_path(i)) except: output.speak("An exception was raised while attempting to clean malformed session data. See the error log for details. If this message persists, contact the developers.",True) os.exception("Exception thrown while removing malformed session") self.view.fill_list(sessionsList)
def main(bam_file, config_file=None, chrom='all', start=0, end=None, outfile=None, normalize=False, use_tempfile=False): if config_file: config = load_config(config_file) else: config = {"program": {"ucsc_bigwig": "wigToBigWig"}} if outfile is None: outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] if start > 0: start = int(start) - 1 if end is not None: end = int(end) regions = [(chrom, start, end)] if os.path.abspath(bam_file) == os.path.abspath(outfile): sys.stderr.write( "Bad arguments, input and output files are the same.\n") sys.exit(1) if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0): if use_tempfile: # Use a temp file to avoid any possiblity of not having write permission out_handle = tempfile.NamedTemporaryFile(delete=False) wig_file = out_handle.name else: wig_file = "%s.wig" % os.path.splitext(outfile)[0] out_handle = open(wig_file, "w") with closing(out_handle): chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle, normalize) try: if wig_valid: convert_to_bigwig(wig_file, chr_sizes, config, outfile) finally: os.remove(wig_file)
def evaluate_line(): config_path = os.path.join(FLAGS.config_path, 'config') test_config = load_config(config_path) _, word_to_id = read_vocab(test_config['vocab_file']) categorys, cat_to_id = read_category() contents, labels = read_file('data/cnews.val2.txt') model = Model(test_config) session = tf.Session() session.run(tf.global_variables_initializer()) saver = tf.train.Saver() # 读取模型 checkpoint_path = os.path.join(FLAGS.checkpoints_path) checkpoint_file = tf.train.latest_checkpoint(checkpoint_path) saver.restore(session, checkpoint_file) while True: line = input("请输入测试句子:") x_input = [[word_to_id[x] for x in line if x in word_to_id]] x_pad = kr.preprocessing.sequence.pad_sequences(x_input, 600) predict = model.evaluate(session, x_pad) print(categorys[predict[0][0]])
def __init__(self, cfg): if 'surface_grasp_logdir_folder' in cfg: # for sim evaluation self._contact_grasp_cfg = config_utils.load_config( cfg['surface_grasp_logdir_folder'], batch_size=1, arg_configs=cfg.arg_configs) self._cfg = cfg self._num_samples = self._cfg.num_samples else: self._contact_grasp_cfg = cfg self._model_func = importlib.import_module( self._contact_grasp_cfg['MODEL']['model']) self._num_input_points = self._contact_grasp_cfg['DATA'][ 'raw_num_points'] if 'raw_num_points' in self._contact_grasp_cfg[ 'DATA'] else self._contact_grasp_cfg['DATA']['num_point'] self.placeholders = self._model_func.placeholder_inputs( self._contact_grasp_cfg['OPTIMIZER']['batch_size'], self._num_input_points, self._contact_grasp_cfg['DATA']['input_normals']) self.model_ops = {}
def get_projects_visualization_data(): graph_type = request.args.get('graph_type') if graph_type == 'bar_graph': graph_data_length = len(bar_graph_data) p_str = "{{ \"count\": {0}, \"sort\": \"\", \"from\": 1, \"page\": 1, \"total\": {1}, \"pages\": 1, \"size\": 100 }}".format(graph_data_length, graph_data_length) counts = {} hit_list = [] config = load_config(file_name='config.json') for row in bar_graph_data: bar_segment_tip = row[config['search']['barchart-config']['data-tooltip-label']] x_labels = row[config['search']['barchart-config']['x-axis-labels']] n_files = row["file_count"] n_cases = row["case_count"] if x_labels is None: x_labels = "None" if x_labels in counts: counts[x_labels] = counts[x_labels] + 1 else: counts[x_labels] = 1 hit_list.append({"x_axis" : x_labels, "bar_segment_tip": bar_segment_tip , "summary": { "case_count": n_cases, "file_count": n_files} }) buckets_list = [] for ckey in counts: ccount = counts[ckey] buckets_list.append({ "key": ckey, "doc_count": ccount}) buckets_str = json.dumps(buckets_list) hit_str = json.dumps(hit_list) agg_str = "{{ \"x_labels\": {{ \"buckets\": {0} }}}}".format(buckets_str) data = ("{{\"data\" : {{\"aggregations\": {0}, \"hits\" : {1}, \"pagination\": {0}}}, \"warnings\": {{}}}}".format(agg_str, hit_str, p_str)) return make_json_response(data)
def main(xargs, nas_bench): assert torch.cuda.is_available(), "CUDA is not available." torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) if xargs.dataset == "cifar10": dataname = "cifar10-valid" else: dataname = xargs.dataset if xargs.data_path is not None: train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) split_Fpath = "configs/nas-benchmark/cifar-split.txt" cifar_split = load_config(split_Fpath, None, None) train_split, valid_split = cifar_split.train, cifar_split.valid logger.log("Load split file from {:}".format(split_Fpath)) config_path = "configs/nas-benchmark/algos/R-EA.config" config = load_config(config_path, { "class_num": class_num, "xshape": xshape }, logger) # To split data train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 search_data = SearchDataset(xargs.dataset, train_data, train_split, valid_split) # data loader train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=xargs.workers, pin_memory=True, ) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True, ) logger.log( "||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}" .format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size)) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) extra_info = { "config": config, "train_loader": train_loader, "valid_loader": valid_loader, } else: config_path = "configs/nas-benchmark/algos/R-EA.config" config = load_config(config_path, None, logger) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) extra_info = { "config": config, "train_loader": None, "valid_loader": None } # nas dataset load assert xargs.arch_nas_dataset is not None and os.path.isfile( xargs.arch_nas_dataset) search_space = get_search_spaces("cell", xargs.search_space_name) cs = get_configuration_space(xargs.max_nodes, search_space) config2structure = config2structure_func(xargs.max_nodes) hb_run_id = "0" NS = hpns.NameServer(run_id=hb_run_id, host="localhost", port=0) ns_host, ns_port = NS.start() num_workers = 1 # nas_bench = AANASBenchAPI(xargs.arch_nas_dataset) # logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string())) workers = [] for i in range(num_workers): w = MyWorker( nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, dataname=dataname, nas_bench=nas_bench, time_budget=xargs.time_budget, run_id=hb_run_id, id=i, ) w.run(background=True) workers.append(w) start_time = time.time() bohb = BOHB( configspace=cs, run_id=hb_run_id, eta=3, min_budget=12, max_budget=200, nameserver=ns_host, nameserver_port=ns_port, num_samples=xargs.num_samples, random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor, ping_interval=10, min_bandwidth=xargs.min_bandwidth, ) results = bohb.run(xargs.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() real_cost_time = time.time() - start_time id2config = results.get_id2config_mapping() incumbent = results.get_incumbent_id() logger.log("Best found configuration: {:} within {:.3f} s".format( id2config[incumbent]["config"], real_cost_time)) best_arch = config2structure(id2config[incumbent]["config"]) info = nas_bench.query_by_arch(best_arch, "200") if info is None: logger.log("Did not find this architecture : {:}.".format(best_arch)) else: logger.log("{:}".format(info)) logger.log("-" * 100) logger.log("workers : {:.1f}s with {:} archs".format( workers[0].time_budget, len(workers[0].seen_archs))) logger.close() return logger.log_dir, nas_bench.query_index_by_arch( best_arch), real_cost_time
def main(args): assert torch.cuda.is_available(), "CUDA is not available." torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True torch.set_num_threads(args.workers) prepare_seed(args.rand_seed) logger = prepare_logger(args) # prepare dataset train_data, valid_data, xshape, class_num = get_datasets( args.dataset, args.data_path, args.cutout_length) # train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True , num_workers=args.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, ) split_file_path = Path(args.split_path) assert split_file_path.exists(), "{:} does not exist".format( split_file_path) split_info = torch.load(split_file_path) train_split, valid_split = split_info["train"], split_info["valid"] assert (len(set(train_split).intersection(set(valid_split))) == 0 ), "There should be 0 element that belongs to both train and valid" assert len(train_split) + len(valid_split) == len( train_data), "{:} + {:} vs {:}".format(len(train_split), len(valid_split), len(train_data)) search_dataset = SearchDataset(args.dataset, train_data, train_split, valid_split) search_train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), pin_memory=True, num_workers=args.workers, ) search_valid_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), pin_memory=True, num_workers=args.workers, ) search_loader = torch.utils.data.DataLoader( search_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None, ) # get configures model_config = load_config( args.model_config, { "class_num": class_num, "search_mode": args.search_shape }, logger, ) # obtain the model search_model = obtain_search_model(model_config) MAX_FLOP, param = get_model_infos(search_model, xshape) optim_config = load_config(args.optim_config, { "class_num": class_num, "FLOP": MAX_FLOP }, logger) logger.log("Model Information : {:}".format(search_model.get_message())) logger.log("MAX_FLOP = {:} M".format(MAX_FLOP)) logger.log("Params = {:} M".format(param)) logger.log("train_data : {:}".format(train_data)) logger.log("search-data: {:}".format(search_dataset)) logger.log("search_train_loader : {:} samples".format(len(train_split))) logger.log("search_valid_loader : {:} samples".format(len(valid_split))) base_optimizer, scheduler, criterion = get_optim_scheduler( search_model.base_parameters(), optim_config) arch_optimizer = torch.optim.Adam( search_model.arch_parameters(), lr=optim_config.arch_LR, betas=(0.5, 0.999), weight_decay=optim_config.arch_decay, ) logger.log("base-optimizer : {:}".format(base_optimizer)) logger.log("arch-optimizer : {:}".format(arch_optimizer)) logger.log("scheduler : {:}".format(scheduler)) logger.log("criterion : {:}".format(criterion)) last_info, model_base_path, model_best_path = ( logger.path("info"), logger.path("model"), logger.path("best"), ) network, criterion = torch.nn.DataParallel( search_model).cuda(), criterion.cuda() # load checkpoint if last_info.exists() or (args.resume is not None and osp.isfile( args.resume)): # automatically resume from previous checkpoint if args.resume is not None and osp.isfile(args.resume): resume_path = Path(args.resume) elif last_info.exists(): resume_path = last_info else: raise ValueError("Something is wrong.") logger.log("=> loading checkpoint of the last-info '{:}' start".format( resume_path)) checkpoint = torch.load(resume_path) if "last_checkpoint" in checkpoint: last_checkpoint_path = checkpoint["last_checkpoint"] if not last_checkpoint_path.exists(): logger.log("Does not find {:}, try another path".format( last_checkpoint_path)) last_checkpoint_path = (resume_path.parent / last_checkpoint_path.parent.name / last_checkpoint_path.name) assert (last_checkpoint_path.exists() ), "can not find the checkpoint from {:}".format( last_checkpoint_path) checkpoint = torch.load(last_checkpoint_path) start_epoch = checkpoint["epoch"] + 1 search_model.load_state_dict(checkpoint["search_model"]) scheduler.load_state_dict(checkpoint["scheduler"]) base_optimizer.load_state_dict(checkpoint["base_optimizer"]) arch_optimizer.load_state_dict(checkpoint["arch_optimizer"]) valid_accuracies = checkpoint["valid_accuracies"] arch_genotypes = checkpoint["arch_genotypes"] discrepancies = checkpoint["discrepancies"] logger.log( "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch." .format(resume_path, start_epoch)) else: logger.log( "=> do not find the last-info file : {:} or resume : {:}".format( last_info, args.resume)) start_epoch, valid_accuracies, arch_genotypes, discrepancies = ( 0, { "best": -1 }, {}, {}, ) # main procedure train_func, valid_func = get_procedures(args.procedure) total_epoch = optim_config.epochs + optim_config.warmup start_time, epoch_time = time.time(), AverageMeter() for epoch in range(start_epoch, total_epoch): scheduler.update(epoch, 0.0) search_model.set_tau(args.gumbel_tau_max, args.gumbel_tau_min, epoch * 1.0 / total_epoch) need_time = "Time Left: {:}".format( convert_secs2time(epoch_time.avg * (total_epoch - epoch), True)) epoch_str = "epoch={:03d}/{:03d}".format(epoch, total_epoch) LRs = scheduler.get_lr() find_best = False logger.log( "\n***{:s}*** start {:s} {:s}, LR=[{:.6f} ~ {:.6f}], scheduler={:}, tau={:}, FLOP={:.2f}" .format( time_string(), epoch_str, need_time, min(LRs), max(LRs), scheduler, search_model.tau, MAX_FLOP, )) # train for one epoch train_base_loss, train_arch_loss, train_acc1, train_acc5 = train_func( search_loader, network, criterion, scheduler, base_optimizer, arch_optimizer, optim_config, { "epoch-str": epoch_str, "FLOP-exp": MAX_FLOP * args.FLOP_ratio, "FLOP-weight": args.FLOP_weight, "FLOP-tolerant": MAX_FLOP * args.FLOP_tolerant, }, args.print_freq, logger, ) # log the results logger.log( "***{:s}*** TRAIN [{:}] base-loss = {:.6f}, arch-loss = {:.6f}, accuracy-1 = {:.2f}, accuracy-5 = {:.2f}" .format( time_string(), epoch_str, train_base_loss, train_arch_loss, train_acc1, train_acc5, )) cur_FLOP, genotype = search_model.get_flop("genotype", model_config._asdict(), None) arch_genotypes[epoch] = genotype arch_genotypes["last"] = genotype logger.log("[{:}] genotype : {:}".format(epoch_str, genotype)) arch_info, discrepancy = search_model.get_arch_info() logger.log(arch_info) discrepancies[epoch] = discrepancy logger.log( "[{:}] FLOP : {:.2f} MB, ratio : {:.4f}, Expected-ratio : {:.4f}, Discrepancy : {:.3f}" .format( epoch_str, cur_FLOP, cur_FLOP / MAX_FLOP, args.FLOP_ratio, np.mean(discrepancy), )) # if cur_FLOP/MAX_FLOP > args.FLOP_ratio: # init_flop_weight = init_flop_weight * args.FLOP_decay # else: # init_flop_weight = init_flop_weight / args.FLOP_decay # evaluate the performance if (epoch % args.eval_frequency == 0) or (epoch + 1 == total_epoch): logger.log("-" * 150) valid_loss, valid_acc1, valid_acc5 = valid_func( search_valid_loader, network, criterion, epoch_str, args.print_freq_eval, logger, ) valid_accuracies[epoch] = valid_acc1 logger.log( "***{:s}*** VALID [{:}] loss = {:.6f}, accuracy@1 = {:.2f}, accuracy@5 = {:.2f} | Best-Valid-Acc@1={:.2f}, Error@1={:.2f}" .format( time_string(), epoch_str, valid_loss, valid_acc1, valid_acc5, valid_accuracies["best"], 100 - valid_accuracies["best"], )) if valid_acc1 > valid_accuracies["best"]: valid_accuracies["best"] = valid_acc1 arch_genotypes["best"] = genotype find_best = True logger.log( "Currently, the best validation accuracy found at {:03d}-epoch :: acc@1={:.2f}, acc@5={:.2f}, error@1={:.2f}, error@5={:.2f}, save into {:}." .format( epoch, valid_acc1, valid_acc5, 100 - valid_acc1, 100 - valid_acc5, model_best_path, )) # save checkpoint save_path = save_checkpoint( { "epoch": epoch, "args": deepcopy(args), "valid_accuracies": deepcopy(valid_accuracies), "model-config": model_config._asdict(), "optim-config": optim_config._asdict(), "search_model": search_model.state_dict(), "scheduler": scheduler.state_dict(), "base_optimizer": base_optimizer.state_dict(), "arch_optimizer": arch_optimizer.state_dict(), "arch_genotypes": arch_genotypes, "discrepancies": discrepancies, }, model_base_path, logger, ) if find_best: copy_checkpoint(model_base_path, model_best_path, logger) last_info = save_checkpoint( { "epoch": epoch, "args": deepcopy(args), "last_checkpoint": save_path, }, logger.path("info"), logger, ) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() logger.log("") logger.log("-" * 100) last_config_path = logger.path("log") / "seed-{:}-last.config".format( args.rand_seed) configure2str(arch_genotypes["last"], str(last_config_path)) logger.log("save the last config int {:} :\n{:}".format( last_config_path, arch_genotypes["last"])) best_arch, valid_acc = arch_genotypes["best"], valid_accuracies["best"] for key, config in arch_genotypes.items(): if key == "last": continue FLOP_ratio = config["estimated_FLOP"] / MAX_FLOP if abs(FLOP_ratio - args.FLOP_ratio) <= args.FLOP_tolerant: if valid_acc < valid_accuracies[key]: best_arch, valid_acc = config, valid_accuracies[key] print("Best-Arch : {:}\nRatio={:}, Valid-ACC={:}".format( best_arch, best_arch["estimated_FLOP"] / MAX_FLOP, valid_acc)) best_config_path = logger.path("log") / "seed-{:}-best.config".format( args.rand_seed) configure2str(best_arch, str(best_config_path)) logger.log("save the last config int {:} :\n{:}".format( best_config_path, best_arch)) logger.log("\n" + "-" * 200) logger.log( "Finish training/validation in {:}, and save final checkpoint into {:}" .format(convert_secs2time(epoch_time.sum, True), logger.path("info"))) logger.close()
from config_utils import load_config config = load_config() def main(): import pandas as pd date_range = pd.date_range(config['mld_holte']['start'], config['mld_holte']['end'], freq="1D") for func_name in config['mld_holte']['functions']: func = globals()[func_name] func(date_range) def download(*args): from utils import download_utils as dl from pandas import date_range as pd_date_range import os remote_path = "http://mixedlayer.ucsd.edu/data/Argo_mixedlayers_monthlyclim_05092018.nc" local_path = os.path.join( config['path']['raw'], "mld_holte/Argo_mixedlayers_monthlyclim_05092018.nc") dl.download_wget(remote_path, local_path, verbose=True) def preprocessor_mld_holte_025d(xds, date=None): from utils import xarray_tools as xt import astropy.convolution as conv
def main(xargs): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) if os.path.isdir(xargs.save_dir): if click.confirm( '\nSave directory already exists in {}. Erase?'.format( xargs.save_dir, default=False)): os.system('rm -r ' + xargs.save_dir) assert not os.path.exists(xargs.save_dir) os.mkdir(xargs.save_dir) logger = prepare_logger(args) writer = SummaryWriter(xargs.save_dir) perturb_alpha = None if xargs.perturb: perturb_alpha = random_alpha train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) # config_path = 'configs/nas-benchmark/algos/DARTS.config' config = load_config(xargs.config_path, { 'class_num': class_num, 'xshape': xshape }, logger) search_loader, _, valid_loader = get_nas_search_loaders( train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/', config.batch_size, xargs.workers) logger.log( '||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}' .format(xargs.dataset, len(search_loader), len(valid_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format( xargs.dataset, config)) search_space = get_search_spaces('cell', xargs.search_space_name) if xargs.model_config is None: model_config = dict2config( { 'name': xargs.model, 'C': xargs.channel, 'N': xargs.num_cells, 'max_nodes': xargs.max_nodes, 'num_classes': class_num, 'space': search_space, 'affine': bool(xargs.affine), 'track_running_stats': bool(xargs.track_running_stats) }, None) else: model_config = load_config( xargs.model_config, { 'num_classes': class_num, 'space': search_space, 'affine': bool(xargs.affine), 'track_running_stats': bool(xargs.track_running_stats) }, None) search_model = get_cell_based_tiny_net(model_config) # logger.log('search-model :\n{:}'.format(search_model)) w_optimizer, w_scheduler, criterion = get_optim_scheduler( search_model.get_weights(), config, xargs.weight_learning_rate) a_optimizer = torch.optim.Adam(search_model.get_alphas(), lr=xargs.arch_learning_rate, betas=(0.5, 0.999), weight_decay=xargs.arch_weight_decay) logger.log('w-optimizer : {:}'.format(w_optimizer)) logger.log('a-optimizer : {:}'.format(a_optimizer)) logger.log('w-scheduler : {:}'.format(w_scheduler)) logger.log('criterion : {:}'.format(criterion)) flop, param = get_model_infos(search_model, xshape) # logger.log('{:}'.format(search_model)) logger.log('FLOP = {:.2f} M, Params = {:.2f} MB'.format(flop, param)) if xargs.arch_nas_dataset is None: api = None else: api = API(xargs.arch_nas_dataset) logger.log('{:} create API = {:} done'.format(time_string(), api)) last_info, model_base_path, model_best_path = logger.path( 'info'), logger.path('model'), logger.path('best') network, criterion = torch.nn.DataParallel( search_model).cuda(), criterion.cuda() if last_info.exists(): # automatically resume from previous checkpoint logger.log("=> loading checkpoint of the last-info '{:}' start".format( last_info)) last_info = torch.load(last_info) start_epoch = last_info['epoch'] checkpoint = torch.load(last_info['last_checkpoint']) genotypes = checkpoint['genotypes'] valid_accuracies = checkpoint['valid_accuracies'] search_model.load_state_dict(checkpoint['search_model']) w_scheduler.load_state_dict(checkpoint['w_scheduler']) w_optimizer.load_state_dict(checkpoint['w_optimizer']) a_optimizer.load_state_dict(checkpoint['a_optimizer']) logger.log( "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch." .format(last_info, start_epoch)) else: logger.log("=> do not find the last-info file : {:}".format(last_info)) start_epoch, valid_accuracies, genotypes = 0, { 'best': -1 }, { -1: search_model.genotype() } # start training # start_time, search_time, epoch_time, total_epoch = time.time(), AverageMeter(), AverageMeter(), config.epochs + config.warmup start_time, search_time, epoch_time = time.time(), AverageMeter( ), AverageMeter() total_epoch = config.epochs + config.warmup assert 0 < xargs.early_stop_epoch <= total_epoch - 1 for epoch in range(start_epoch, total_epoch): if epoch >= xargs.early_stop_epoch: logger.log(f"Early stop @ {epoch} epoch.") break if xargs.perturb: epsilon_alpha = 0.03 + (xargs.epsilon_alpha - 0.03) * epoch / total_epoch logger.log(f'epoch {epoch} epsilon_alpha {epsilon_alpha}') else: epsilon_alpha = None w_scheduler.update(epoch, 0.0) need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.val * (total_epoch - epoch), True)) epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch) logger.log('\n[Search the {:}-th epoch] {:}, LR={:}'.format( epoch_str, need_time, min(w_scheduler.get_lr()))) search_w_loss, search_w_top1, search_w_top5, search_a_loss, search_a_top1, search_a_top5 = search_func( search_loader, network, criterion, w_scheduler, w_optimizer, a_optimizer, epoch_str, xargs.print_freq, logger, xargs.gradient_clip, perturb_alpha, epsilon_alpha) search_time.update(time.time() - start_time) logger.log( '[{:}] searching : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s' .format(epoch_str, search_w_loss, search_w_top1, search_w_top5, search_time.sum)) valid_a_loss, valid_a_top1, valid_a_top5 = valid_func( valid_loader, network, criterion) writer.add_scalar('search/weight_loss', search_w_loss, epoch) writer.add_scalar('search/weight_top1_acc', search_w_top1, epoch) writer.add_scalar('search/weight_top5_acc', search_w_top5, epoch) writer.add_scalar('search/arch_loss', search_a_loss, epoch) writer.add_scalar('search/arch_top1_acc', search_a_top1, epoch) writer.add_scalar('search/arch_top5_acc', search_a_top5, epoch) writer.add_scalar('evaluate/loss', valid_a_loss, epoch) writer.add_scalar('evaluate/top1_acc', valid_a_top1, epoch) writer.add_scalar('evaluate/top5_acc', valid_a_top5, epoch) logger.log( '[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%' .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5)) writer.add_scalar('entropy', search_model.entropy, epoch) per_edge_dict = get_per_egde_value_dict(search_model.arch_parameters) for edge_name, edge_val in per_edge_dict.items(): writer.add_scalars(f"cell/{edge_name}", edge_val, epoch) # check the best accuracy valid_accuracies[epoch] = valid_a_top1 if valid_a_top1 > valid_accuracies['best']: valid_accuracies['best'] = valid_a_top1 genotypes['best'] = search_model.genotype() find_best = True else: find_best = False genotypes[epoch] = search_model.genotype() logger.log('<<<--->>> The {:}-th epoch : {:}'.format( epoch_str, genotypes[epoch])) # save checkpoint save_path = save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(xargs), 'search_model': search_model.state_dict(), 'w_optimizer': w_optimizer.state_dict(), 'a_optimizer': a_optimizer.state_dict(), 'w_scheduler': w_scheduler.state_dict(), 'genotypes': genotypes, 'valid_accuracies': valid_accuracies }, model_base_path, logger) save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(args), 'last_checkpoint': save_path, }, logger.path('info'), logger) if xargs.snapshoot > 0 and epoch % xargs.snapshoot == 0: save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(args), 'search_model': search_model.state_dict(), }, os.path.join(str(logger.model_dir), f"checkpoint_epoch{epoch}.pth"), logger) if find_best: logger.log( '<<<--->>> The {:}-th epoch : find the highest validation accuracy : {:.2f}%.' .format(epoch_str, valid_a_top1)) copy_checkpoint(model_base_path, model_best_path, logger) with torch.no_grad(): logger.log('{:}'.format(search_model.show_alphas())) if api is not None: logger.log('{:}'.format(api.query_by_arch(genotypes[epoch]))) index = api.query_index_by_arch(genotypes[epoch]) info = api.query_meta_info_by_index( index) # This is an instance of `ArchResults` res_metrics = info.get_metrics( f'{xargs.dataset}', 'ori-test') # This is a dict with metric names as keys # cost_metrics = info.get_comput_costs('cifar10') writer.add_scalar(f'{xargs.dataset}_ground_acc_ori-test', res_metrics['accuracy'], epoch) writer.add_scalar(f'{xargs.dataset}_search_acc', valid_a_top1, epoch) if xargs.dataset.lower() != 'cifar10': writer.add_scalar( f'{xargs.dataset}_ground_acc_x-test', info.get_metrics(f'{xargs.dataset}', 'x-test')['accuracy'], epoch) if find_best: valid_accuracies['best_gt'] = res_metrics['accuracy'] writer.add_scalar(f"{xargs.dataset}_cur_best_gt_acc_ori-test", valid_accuracies['best_gt'], epoch) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() logger.log('\n' + '-' * 100) logger.log('{:} : run {:} epochs, cost {:.1f} s, last-geno is {:}.'.format( args.model, xargs.early_stop_epoch, search_time.sum, genotypes[xargs.early_stop_epoch - 1])) if api is not None: logger.log('{:}'.format( api.query_by_arch(genotypes[xargs.early_stop_epoch - 1]))) logger.close()
def main(xargs): assert torch.cuda.is_available(), "CUDA is not available." torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) config = load_config(xargs.config_path, { "class_num": class_num, "xshape": xshape }, logger) search_loader, _, valid_loader = get_nas_search_loaders( train_data, valid_data, xargs.dataset, "configs/nas-benchmark/", (config.batch_size, config.test_batch_size), xargs.workers, ) logger.log( "||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}" .format(xargs.dataset, len(search_loader), len(valid_loader), config.batch_size)) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) search_space = get_search_spaces("cell", xargs.search_space_name) if xargs.model_config is None: model_config = dict2config( dict( name="SETN", C=xargs.channel, N=xargs.num_cells, max_nodes=xargs.max_nodes, num_classes=class_num, space=search_space, affine=False, track_running_stats=bool(xargs.track_running_stats), ), None, ) else: model_config = load_config( xargs.model_config, dict( num_classes=class_num, space=search_space, affine=False, track_running_stats=bool(xargs.track_running_stats), ), None, ) logger.log("search space : {:}".format(search_space)) search_model = get_cell_based_tiny_net(model_config) w_optimizer, w_scheduler, criterion = get_optim_scheduler( search_model.get_weights(), config) a_optimizer = torch.optim.Adam( search_model.get_alphas(), lr=xargs.arch_learning_rate, betas=(0.5, 0.999), weight_decay=xargs.arch_weight_decay, ) logger.log("w-optimizer : {:}".format(w_optimizer)) logger.log("a-optimizer : {:}".format(a_optimizer)) logger.log("w-scheduler : {:}".format(w_scheduler)) logger.log("criterion : {:}".format(criterion)) flop, param = get_model_infos(search_model, xshape) logger.log("FLOP = {:.2f} M, Params = {:.2f} MB".format(flop, param)) logger.log("search-space : {:}".format(search_space)) if xargs.arch_nas_dataset is None: api = None else: api = API(xargs.arch_nas_dataset) logger.log("{:} create API = {:} done".format(time_string(), api)) last_info, model_base_path, model_best_path = ( logger.path("info"), logger.path("model"), logger.path("best"), ) network, criterion = torch.nn.DataParallel( search_model).cuda(), criterion.cuda() if last_info.exists(): # automatically resume from previous checkpoint logger.log("=> loading checkpoint of the last-info '{:}' start".format( last_info)) last_info = torch.load(last_info) start_epoch = last_info["epoch"] checkpoint = torch.load(last_info["last_checkpoint"]) genotypes = checkpoint["genotypes"] valid_accuracies = checkpoint["valid_accuracies"] search_model.load_state_dict(checkpoint["search_model"]) w_scheduler.load_state_dict(checkpoint["w_scheduler"]) w_optimizer.load_state_dict(checkpoint["w_optimizer"]) a_optimizer.load_state_dict(checkpoint["a_optimizer"]) logger.log( "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch." .format(last_info, start_epoch)) else: logger.log("=> do not find the last-info file : {:}".format(last_info)) init_genotype, _ = get_best_arch(valid_loader, network, xargs.select_num) start_epoch, valid_accuracies, genotypes = 0, { "best": -1 }, { -1: init_genotype } # start training start_time, search_time, epoch_time, total_epoch = ( time.time(), AverageMeter(), AverageMeter(), config.epochs + config.warmup, ) for epoch in range(start_epoch, total_epoch): w_scheduler.update(epoch, 0.0) need_time = "Time Left: {:}".format( convert_secs2time(epoch_time.val * (total_epoch - epoch), True)) epoch_str = "{:03d}-{:03d}".format(epoch, total_epoch) logger.log("\n[Search the {:}-th epoch] {:}, LR={:}".format( epoch_str, need_time, min(w_scheduler.get_lr()))) ( search_w_loss, search_w_top1, search_w_top5, search_a_loss, search_a_top1, search_a_top5, ) = search_func( search_loader, network, criterion, w_scheduler, w_optimizer, a_optimizer, epoch_str, xargs.print_freq, logger, ) search_time.update(time.time() - start_time) logger.log( "[{:}] search [base] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s" .format(epoch_str, search_w_loss, search_w_top1, search_w_top5, search_time.sum)) logger.log( "[{:}] search [arch] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%" .format(epoch_str, search_a_loss, search_a_top1, search_a_top5)) genotype, temp_accuracy = get_best_arch(valid_loader, network, xargs.select_num) network.module.set_cal_mode("dynamic", genotype) valid_a_loss, valid_a_top1, valid_a_top5 = valid_func( valid_loader, network, criterion) logger.log( "[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}% | {:}" .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5, genotype)) # search_model.set_cal_mode('urs') # valid_a_loss , valid_a_top1 , valid_a_top5 = valid_func(valid_loader, network, criterion) # logger.log('[{:}] URS---evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5)) # search_model.set_cal_mode('joint') # valid_a_loss , valid_a_top1 , valid_a_top5 = valid_func(valid_loader, network, criterion) # logger.log('[{:}] JOINT-evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5)) # search_model.set_cal_mode('select') # valid_a_loss , valid_a_top1 , valid_a_top5 = valid_func(valid_loader, network, criterion) # logger.log('[{:}] Selec-evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5)) # check the best accuracy valid_accuracies[epoch] = valid_a_top1 genotypes[epoch] = genotype logger.log("<<<--->>> The {:}-th epoch : {:}".format( epoch_str, genotypes[epoch])) # save checkpoint save_path = save_checkpoint( { "epoch": epoch + 1, "args": deepcopy(xargs), "search_model": search_model.state_dict(), "w_optimizer": w_optimizer.state_dict(), "a_optimizer": a_optimizer.state_dict(), "w_scheduler": w_scheduler.state_dict(), "genotypes": genotypes, "valid_accuracies": valid_accuracies, }, model_base_path, logger, ) last_info = save_checkpoint( { "epoch": epoch + 1, "args": deepcopy(args), "last_checkpoint": save_path, }, logger.path("info"), logger, ) with torch.no_grad(): logger.log("{:}".format(search_model.show_alphas())) if api is not None: logger.log("{:}".format(api.query_by_arch(genotypes[epoch], "200"))) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() # the final post procedure : count the time start_time = time.time() genotype, temp_accuracy = get_best_arch(valid_loader, network, xargs.select_num) search_time.update(time.time() - start_time) network.module.set_cal_mode("dynamic", genotype) valid_a_loss, valid_a_top1, valid_a_top5 = valid_func( valid_loader, network, criterion) logger.log( "Last : the gentotype is : {:}, with the validation accuracy of {:.3f}%." .format(genotype, valid_a_top1)) logger.log("\n" + "-" * 100) # check the performance from the architecture dataset logger.log( "SETN : run {:} epochs, cost {:.1f} s, last-geno is {:}.".format( total_epoch, search_time.sum, genotype)) if api is not None: logger.log("{:}".format(api.query_by_arch(genotype, "200"))) logger.close()
def main(xargs): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) if xargs.overwite_epochs is None: extra_info = {'class_num': class_num, 'xshape': xshape} else: extra_info = { 'class_num': class_num, 'xshape': xshape, 'epochs': xargs.overwite_epochs } config = load_config(xargs.config_path, extra_info, logger) search_loader, train_loader, valid_loader = get_nas_search_loaders( train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/', (config.batch_size, config.test_batch_size), xargs.workers) logger.log( '||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}' .format(xargs.dataset, len(search_loader), len(valid_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format( xargs.dataset, config)) search_space = get_search_spaces(xargs.search_space, 'nas-bench-301') model_config = dict2config( dict(name='generic', C=xargs.channel, N=xargs.num_cells, max_nodes=xargs.max_nodes, num_classes=class_num, space=search_space, affine=bool(xargs.affine), track_running_stats=bool(xargs.track_running_stats)), None) logger.log('search space : {:}'.format(search_space)) logger.log('model config : {:}'.format(model_config)) search_model = get_cell_based_tiny_net(model_config) search_model.set_algo(xargs.algo) logger.log('{:}'.format(search_model)) w_optimizer, w_scheduler, criterion = get_optim_scheduler( search_model.weights, config) a_optimizer = torch.optim.Adam(search_model.alphas, lr=xargs.arch_learning_rate, betas=(0.5, 0.999), weight_decay=xargs.arch_weight_decay, eps=xargs.arch_eps) logger.log('w-optimizer : {:}'.format(w_optimizer)) logger.log('a-optimizer : {:}'.format(a_optimizer)) logger.log('w-scheduler : {:}'.format(w_scheduler)) logger.log('criterion : {:}'.format(criterion)) params = count_parameters_in_MB(search_model) logger.log('The parameters of the search model = {:.2f} MB'.format(params)) logger.log('search-space : {:}'.format(search_space)) if bool(xargs.use_api): api = create(None, 'topology', fast_mode=True, verbose=False) else: api = None logger.log('{:} create API = {:} done'.format(time_string(), api)) last_info, model_base_path, model_best_path = logger.path( 'info'), logger.path('model'), logger.path('best') network, criterion = search_model.cuda(), criterion.cuda( ) # use a single GPU last_info, model_base_path, model_best_path = logger.path( 'info'), logger.path('model'), logger.path('best') if last_info.exists(): # automatically resume from previous checkpoint logger.log("=> loading checkpoint of the last-info '{:}' start".format( last_info)) last_info = torch.load(last_info) start_epoch = last_info['epoch'] checkpoint = torch.load(last_info['last_checkpoint']) genotypes = checkpoint['genotypes'] baseline = checkpoint['baseline'] valid_accuracies = checkpoint['valid_accuracies'] search_model.load_state_dict(checkpoint['search_model']) w_scheduler.load_state_dict(checkpoint['w_scheduler']) w_optimizer.load_state_dict(checkpoint['w_optimizer']) a_optimizer.load_state_dict(checkpoint['a_optimizer']) logger.log( "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch." .format(last_info, start_epoch)) else: logger.log("=> do not find the last-info file : {:}".format(last_info)) start_epoch, valid_accuracies, genotypes = 0, { 'best': -1 }, { -1: network.return_topK(1, True)[0] } baseline = None # start training start_time, search_time, epoch_time, total_epoch = time.time( ), AverageMeter(), AverageMeter(), config.epochs + config.warmup for epoch in range(start_epoch, total_epoch): w_scheduler.update(epoch, 0.0) need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.val * (total_epoch - epoch), True)) epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch) logger.log('\n[Search the {:}-th epoch] {:}, LR={:}'.format( epoch_str, need_time, min(w_scheduler.get_lr()))) network.set_drop_path( float(epoch + 1) / total_epoch, xargs.drop_path_rate) if xargs.algo == 'gdas': network.set_tau(xargs.tau_max - (xargs.tau_max - xargs.tau_min) * epoch / (total_epoch - 1)) logger.log('[RESET tau as : {:} and drop_path as {:}]'.format( network.tau, network.drop_path)) search_w_loss, search_w_top1, search_w_top5, search_a_loss, search_a_top1, search_a_top5 \ = search_func(search_loader, network, criterion, w_scheduler, w_optimizer, a_optimizer, epoch_str, xargs.print_freq, xargs.algo, logger) search_time.update(time.time() - start_time) logger.log( '[{:}] search [base] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s' .format(epoch_str, search_w_loss, search_w_top1, search_w_top5, search_time.sum)) logger.log( '[{:}] search [arch] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%' .format(epoch_str, search_a_loss, search_a_top1, search_a_top5)) if xargs.algo == 'enas': ctl_loss, ctl_acc, baseline, ctl_reward \ = train_controller(valid_loader, network, criterion, a_optimizer, baseline, epoch_str, xargs.print_freq, logger) logger.log( '[{:}] controller : loss={:}, acc={:}, baseline={:}, reward={:}' .format(epoch_str, ctl_loss, ctl_acc, baseline, ctl_reward)) genotype, temp_accuracy = get_best_arch(valid_loader, network, xargs.eval_candidate_num, xargs.algo) if xargs.algo == 'setn' or xargs.algo == 'enas': network.set_cal_mode('dynamic', genotype) elif xargs.algo == 'gdas': network.set_cal_mode('gdas', None) elif xargs.algo.startswith('darts'): network.set_cal_mode('joint', None) elif xargs.algo == 'random': network.set_cal_mode('urs', None) else: raise ValueError('Invalid algorithm name : {:}'.format(xargs.algo)) logger.log('[{:}] - [get_best_arch] : {:} -> {:}'.format( epoch_str, genotype, temp_accuracy)) valid_a_loss, valid_a_top1, valid_a_top5 = valid_func( valid_loader, network, criterion, xargs.algo, logger) logger.log( '[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}% | {:}' .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5, genotype)) valid_accuracies[epoch] = valid_a_top1 genotypes[epoch] = genotype logger.log('<<<--->>> The {:}-th epoch : {:}'.format( epoch_str, genotypes[epoch])) # save checkpoint save_path = save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(xargs), 'baseline': baseline, 'search_model': search_model.state_dict(), 'w_optimizer': w_optimizer.state_dict(), 'a_optimizer': a_optimizer.state_dict(), 'w_scheduler': w_scheduler.state_dict(), 'genotypes': genotypes, 'valid_accuracies': valid_accuracies }, model_base_path, logger) last_info = save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(args), 'last_checkpoint': save_path, }, logger.path('info'), logger) with torch.no_grad(): logger.log('{:}'.format(search_model.show_alphas())) if api is not None: logger.log('{:}'.format(api.query_by_arch(genotypes[epoch], '200'))) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() # the final post procedure : count the time start_time = time.time() genotype, temp_accuracy = get_best_arch(valid_loader, network, xargs.eval_candidate_num, xargs.algo) if xargs.algo == 'setn' or xargs.algo == 'enas': network.set_cal_mode('dynamic', genotype) elif xargs.algo == 'gdas': network.set_cal_mode('gdas', None) elif xargs.algo.startswith('darts'): network.set_cal_mode('joint', None) elif xargs.algo == 'random': network.set_cal_mode('urs', None) else: raise ValueError('Invalid algorithm name : {:}'.format(xargs.algo)) search_time.update(time.time() - start_time) valid_a_loss, valid_a_top1, valid_a_top5 = valid_func( valid_loader, network, criterion, xargs.algo, logger) logger.log( 'Last : the gentotype is : {:}, with the validation accuracy of {:.3f}%.' .format(genotype, valid_a_top1)) logger.log('\n' + '-' * 100) # check the performance from the architecture dataset logger.log('[{:}] run {:} epochs, cost {:.1f} s, last-geno is {:}.'.format( xargs.algo, total_epoch, search_time.sum, genotype)) if api is not None: logger.log('{:}'.format(api.query_by_arch(genotype, '200'))) logger.close()
def setup (): global app log.debug("Loading global app settings...") app = config_utils.load_config(paths.config_path(MAINFILE), paths.app_path(MAINSPEC))
def setup(): global appconfig appconfig = config_utils.load_config(MAINFILE, MAINSPEC)
def main(xargs): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) #config_path = 'configs/nas-benchmark/algos/GDAS.config' config = load_config(xargs.config_path, { 'class_num': class_num, 'xshape': xshape }, logger) search_loader, _, valid_loader = get_nas_search_loaders( train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/', config.batch_size, xargs.workers) logger.log( '||||||| {:10s} ||||||| Search-Loader-Num={:}, batch size={:}'.format( xargs.dataset, len(search_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format( xargs.dataset, config)) search_space = get_search_spaces('cell', xargs.search_space_name) if xargs.model_config is None: model_config = dict2config( { 'name': 'GDAS', 'C': xargs.channel, 'N': xargs.num_cells, 'max_nodes': xargs.max_nodes, 'num_classes': class_num, 'space': search_space, 'affine': False, 'track_running_stats': bool(xargs.track_running_stats) }, None) else: model_config = load_config( xargs.model_config, { 'num_classes': class_num, 'space': search_space, 'affine': False, 'track_running_stats': bool(xargs.track_running_stats) }, None) search_model = get_cell_based_tiny_net(model_config) logger.log('search-model :\n{:}'.format(search_model)) logger.log('model-config : {:}'.format(model_config)) w_optimizer, w_scheduler, criterion = get_optim_scheduler( search_model.get_weights(), config) a_optimizer = torch.optim.Adam(search_model.get_alphas(), lr=xargs.arch_learning_rate, betas=(0.5, 0.999), weight_decay=xargs.arch_weight_decay) logger.log('w-optimizer : {:}'.format(w_optimizer)) logger.log('a-optimizer : {:}'.format(a_optimizer)) logger.log('w-scheduler : {:}'.format(w_scheduler)) logger.log('criterion : {:}'.format(criterion)) flop, param = get_model_infos(search_model, xshape) logger.log('FLOP = {:.2f} M, Params = {:.2f} MB'.format(flop, param)) logger.log('search-space [{:} ops] : {:}'.format(len(search_space), search_space)) if xargs.arch_nas_dataset is None: api = None else: api = API(xargs.arch_nas_dataset) logger.log('{:} create API = {:} done'.format(time_string(), api)) last_info, model_base_path, model_best_path = logger.path( 'info'), logger.path('model'), logger.path('best') network, criterion = torch.nn.DataParallel( search_model).cuda(), criterion.cuda() if last_info.exists(): # automatically resume from previous checkpoint logger.log("=> loading checkpoint of the last-info '{:}' start".format( last_info)) last_info = torch.load(last_info) start_epoch = last_info['epoch'] checkpoint = torch.load(last_info['last_checkpoint']) genotypes = checkpoint['genotypes'] valid_accuracies = checkpoint['valid_accuracies'] search_model.load_state_dict(checkpoint['search_model']) w_scheduler.load_state_dict(checkpoint['w_scheduler']) w_optimizer.load_state_dict(checkpoint['w_optimizer']) a_optimizer.load_state_dict(checkpoint['a_optimizer']) logger.log( "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch." .format(last_info, start_epoch)) else: logger.log("=> do not find the last-info file : {:}".format(last_info)) start_epoch, valid_accuracies, genotypes = 0, { 'best': -1 }, { -1: search_model.genotype() } # start training start_time, search_time, epoch_time, total_epoch = time.time( ), AverageMeter(), AverageMeter(), config.epochs + config.warmup for epoch in range(start_epoch, total_epoch): w_scheduler.update(epoch, 0.0) need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.val * (total_epoch - epoch), True)) epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch) search_model.set_tau(xargs.tau_max - (xargs.tau_max - xargs.tau_min) * epoch / (total_epoch - 1)) logger.log('\n[Search the {:}-th epoch] {:}, tau={:}, LR={:}'.format( epoch_str, need_time, search_model.get_tau(), min(w_scheduler.get_lr()))) search_w_loss, search_w_top1, search_w_top5, valid_a_loss , valid_a_top1 , valid_a_top5 \ = search_func(search_loader, network, criterion, w_scheduler, w_optimizer, a_optimizer, epoch_str, xargs.print_freq, logger) search_time.update(time.time() - start_time) logger.log( '[{:}] searching : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s' .format(epoch_str, search_w_loss, search_w_top1, search_w_top5, search_time.sum)) logger.log( '[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%' .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5)) # check the best accuracy valid_accuracies[epoch] = valid_a_top1 if valid_a_top1 > valid_accuracies['best']: valid_accuracies['best'] = valid_a_top1 genotypes['best'] = search_model.genotype() find_best = True else: find_best = False genotypes[epoch] = search_model.genotype() logger.log('<<<--->>> The {:}-th epoch : {:}'.format( epoch_str, genotypes[epoch])) # save checkpoint save_path = save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(xargs), 'search_model': search_model.state_dict(), 'w_optimizer': w_optimizer.state_dict(), 'a_optimizer': a_optimizer.state_dict(), 'w_scheduler': w_scheduler.state_dict(), 'genotypes': genotypes, 'valid_accuracies': valid_accuracies }, model_base_path, logger) last_info = save_checkpoint( { 'epoch': epoch + 1, 'args': deepcopy(args), 'last_checkpoint': save_path, }, logger.path('info'), logger) if find_best: logger.log( '<<<--->>> The {:}-th epoch : find the highest validation accuracy : {:.2f}%.' .format(epoch_str, valid_a_top1)) copy_checkpoint(model_base_path, model_best_path, logger) with torch.no_grad(): logger.log('{:}'.format(search_model.show_alphas())) if api is not None: logger.log('{:}'.format(api.query_by_arch(genotypes[epoch], '200'))) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() logger.log('\n' + '-' * 100) # check the performance from the architecture dataset logger.log( 'GDAS : run {:} epochs, cost {:.1f} s, last-geno is {:}.'.format( total_epoch, search_time.sum, genotypes[total_epoch - 1])) if api is not None: logger.log('{:}'.format( api.query_by_arch(genotypes[total_epoch - 1], '200'))) logger.close()
def get_nas_bench_loaders(workers): torch.set_num_threads(workers) root_dir = (pathlib.Path(__file__).parent / '..' / '..').resolve() torch_dir = pathlib.Path(os.environ['TORCH_HOME']) # cifar cifar_config_path = root_dir / 'configs' / 'nas-benchmark' / 'CIFAR.config' cifar_config = load_config(cifar_config_path, None, None) get_datasets = datasets.get_datasets # a function to return the dataset break_line = '-' * 150 print('{:} Create data-loader for all datasets'.format(time_string())) print(break_line) TRAIN_CIFAR10, VALID_CIFAR10, xshape, class_num = get_datasets( 'cifar10', str(torch_dir / 'cifar.python'), -1) print( 'original CIFAR-10 : {:} training images and {:} test images : {:} input shape : {:} number of classes' .format(len(TRAIN_CIFAR10), len(VALID_CIFAR10), xshape, class_num)) cifar10_splits = load_config( root_dir / 'configs' / 'nas-benchmark' / 'cifar-split.txt', None, None) assert cifar10_splits.train[:10] == [ 0, 5, 7, 11, 13, 15, 16, 17, 20, 24 ] and cifar10_splits.valid[:10] == [1, 2, 3, 4, 6, 8, 9, 10, 12, 14] temp_dataset = copy.deepcopy(TRAIN_CIFAR10) temp_dataset.transform = VALID_CIFAR10.transform # data loader trainval_cifar10_loader = torch.utils.data.DataLoader( TRAIN_CIFAR10, batch_size=cifar_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True) train_cifar10_loader = torch.utils.data.DataLoader( TRAIN_CIFAR10, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( cifar10_splits.train), num_workers=workers, pin_memory=True) valid_cifar10_loader = torch.utils.data.DataLoader( temp_dataset, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( cifar10_splits.valid), num_workers=workers, pin_memory=True) test__cifar10_loader = torch.utils.data.DataLoader( VALID_CIFAR10, batch_size=cifar_config.batch_size, shuffle=False, num_workers=workers, pin_memory=True) print('CIFAR-10 : trval-loader has {:3d} batch with {:} per batch'.format( len(trainval_cifar10_loader), cifar_config.batch_size)) print('CIFAR-10 : train-loader has {:3d} batch with {:} per batch'.format( len(train_cifar10_loader), cifar_config.batch_size)) print('CIFAR-10 : valid-loader has {:3d} batch with {:} per batch'.format( len(valid_cifar10_loader), cifar_config.batch_size)) print('CIFAR-10 : test--loader has {:3d} batch with {:} per batch'.format( len(test__cifar10_loader), cifar_config.batch_size)) print(break_line) # CIFAR-100 TRAIN_CIFAR100, VALID_CIFAR100, xshape, class_num = get_datasets( 'cifar100', str(torch_dir / 'cifar.python'), -1) print( 'original CIFAR-100: {:} training images and {:} test images : {:} input shape : {:} number of classes' .format(len(TRAIN_CIFAR100), len(VALID_CIFAR100), xshape, class_num)) cifar100_splits = load_config( root_dir / 'configs' / 'nas-benchmark' / 'cifar100-test-split.txt', None, None) assert cifar100_splits.xvalid[:10] == [ 1, 3, 4, 5, 8, 10, 13, 14, 15, 16 ] and cifar100_splits.xtest[:10] == [0, 2, 6, 7, 9, 11, 12, 17, 20, 24] train_cifar100_loader = torch.utils.data.DataLoader( TRAIN_CIFAR100, batch_size=cifar_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True) valid_cifar100_loader = torch.utils.data.DataLoader( VALID_CIFAR100, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( cifar100_splits.xvalid), num_workers=workers, pin_memory=True) test__cifar100_loader = torch.utils.data.DataLoader( VALID_CIFAR100, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( cifar100_splits.xtest), num_workers=workers, pin_memory=True) print('CIFAR-100 : train-loader has {:3d} batch'.format( len(train_cifar100_loader))) print('CIFAR-100 : valid-loader has {:3d} batch'.format( len(valid_cifar100_loader))) print('CIFAR-100 : test--loader has {:3d} batch'.format( len(test__cifar100_loader))) print(break_line) imagenet16_config_path = 'configs/nas-benchmark/ImageNet-16.config' imagenet16_config = load_config(imagenet16_config_path, None, None) TRAIN_ImageNet16_120, VALID_ImageNet16_120, xshape, class_num = get_datasets( 'ImageNet16-120', str(torch_dir / 'cifar.python' / 'ImageNet16'), -1) print( 'original TRAIN_ImageNet16_120: {:} training images and {:} test images : {:} input shape : {:} number of classes' .format(len(TRAIN_ImageNet16_120), len(VALID_ImageNet16_120), xshape, class_num)) imagenet_splits = load_config( root_dir / 'configs' / 'nas-benchmark' / 'imagenet-16-120-test-split.txt', None, None) assert imagenet_splits.xvalid[:10] == [ 1, 2, 3, 6, 7, 8, 9, 12, 16, 18 ] and imagenet_splits.xtest[:10] == [0, 4, 5, 10, 11, 13, 14, 15, 17, 20] train_imagenet_loader = torch.utils.data.DataLoader( TRAIN_ImageNet16_120, batch_size=imagenet16_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True) valid_imagenet_loader = torch.utils.data.DataLoader( VALID_ImageNet16_120, batch_size=imagenet16_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( imagenet_splits.xvalid), num_workers=workers, pin_memory=True) test__imagenet_loader = torch.utils.data.DataLoader( VALID_ImageNet16_120, batch_size=imagenet16_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( imagenet_splits.xtest), num_workers=workers, pin_memory=True) print('ImageNet-16-120 : train-loader has {:3d} batch with {:} per batch'. format(len(train_imagenet_loader), imagenet16_config.batch_size)) print('ImageNet-16-120 : valid-loader has {:3d} batch with {:} per batch'. format(len(valid_imagenet_loader), imagenet16_config.batch_size)) print('ImageNet-16-120 : test--loader has {:3d} batch with {:} per batch'. format(len(test__imagenet_loader), imagenet16_config.batch_size)) # 'cifar10', 'cifar100', 'ImageNet16-120' loaders = { 'cifar10@trainval': trainval_cifar10_loader, 'cifar10@train': train_cifar10_loader, 'cifar10@valid': valid_cifar10_loader, 'cifar10@test': test__cifar10_loader, 'cifar100@train': train_cifar100_loader, 'cifar100@valid': valid_cifar100_loader, 'cifar100@test': test__cifar100_loader, 'ImageNet16-120@train': train_imagenet_loader, 'ImageNet16-120@valid': valid_imagenet_loader, 'ImageNet16-120@test': test__imagenet_loader } return loaders
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--prefix_path', type=str, required=True, help='Prefix path to the saved model') parser.add_argument('--in_path', type=str, required=True, help='Path to the input file.') parser.add_argument('--out_path', type=str, default=None, help='Path to the output file.') args, unparsed = parser.parse_known_args() FLAGS = config_utils.load_config(args.prefix_path + ".config.json") if FLAGS.model_type == 'bert_word': import zp_datastream import zp_model elif FLAGS.model_type == 'bert_char': import zp_datastream_char as zp_datastream import zp_model_char as zp_model else: assert False, "model_type '{}' not supported".format(FLAGS.model_type) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() print('device: {}, n_gpu: {}, grad_accum_steps: {}'.format( device, n_gpu, FLAGS.grad_accum_steps))
def setup (): global app log.debug("Loading global app settings...") app = config_utils.load_config(os.path.join(storage.data_directory, MAINFILE), os.path.join(paths.app_path(), MAINSPEC))