def main(): args.exp_path /= f'{args.gpu}_{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(Path(args.exp_path), scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(args.exp_path / 'log.txt') fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.seed is None: raise Exception('designate seed.') np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info(f'GPU device = {args.gpu}') logging.info(f'args = {args}') criterion = nn.CrossEntropyLoss().to(device) setting = args.location model = Network(args.init_ch, 10, args.layers, criterion, setting) checkpoint = None previous_epochs = 0 if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(model, checkpoint['state_dict'], False) previous_epochs = checkpoint['epoch'] args.epochs -= previous_epochs if args.epochs <= 0: raise Exception('args.epochs is too small.') if use_DataParallel: print('use Data Parallel') model = nn.parallel.DataParallel(model) model = model.cuda() module = model.module torch.cuda.manual_seed_all(args.seed) else: model = model.to(device) module = model param_size = utils.count_parameters_in_MB(model) logging.info(f'param size = {param_size}MB') arch_and_attn_params = list( map( id, module.arch_and_attn_parameters() if use_DataParallel else model.arch_and_attn_parameters())) weight_params = filter( lambda p: id(p) not in arch_and_attn_params, module.parameters() if use_DataParallel else model.parameters()) optimizer = optim.SGD(weight_params, args.lr, momentum=args.momentum, weight_decay=args.wd) if checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=8) # from 2 valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=8) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=args.lr_min) if checkpoint: scheduler.load_state_dict(checkpoint['scheduler']) arch = Arch(model, criterion, args) if checkpoint: arch.optimizer.load_state_dict(checkpoint['arch_optimizer']) for epoch in tqdm(range(args.epochs), desc='Total Progress'): scheduler.step() lr = scheduler.get_lr()[0] logging.info(f'\nEpoch: {epoch} lr: {lr}') gen = module.genotype() logging.info(f'Genotype: {gen}') print(F.softmax(module.alphas_normal, dim=-1)) print(F.softmax(module.alphas_reduce, dim=-1)) if module.betas_normal is not None: print(F.softmax(module.betas_normal, dim=-1)) print(F.softmax(module.betas_reduce, dim=-1)) if module.gammas_normal is not None: print(F.softmax(module.gammas_normal, dim=-1)) print(F.softmax(module.gammas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr, epoch + 1) logging.info(f'train acc: {train_acc}') # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch + 1) logging.info(f'valid acc: {valid_acc}') utils.save(model, args.exp_path / 'search.pt') utils.save_checkpoint( { 'epoch': epoch + 1 + previous_epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'arch_optimizer': arch.optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, False, args.exp_path) gen = module.genotype() gen_path = args.exp_path / 'genotype.json' utils.save_genotype(gen, gen_path) logging.info(f'Result genotype: {gen}')
f.write(str(self.current_arch.loss()) + "\n") return def log_acc(self): f = open(self.test_acc_path, 'a+') f.write(str(self.current_arch.acc()) + "\n") return def log_best_acc(self): f = open(self.best_acc_path, 'a+') f.write(str(self.best.acc()) + "\n") return def log_best_loss(self): f = open(self.best_loss_path, 'a+') f.write(str(self.best.loss()) + "\n") return if __name__ == '__main__': params1 = {'convs': 2, 'channels': [32, 64], 'weight_init': 0.1, 'fcs': 1, 'lr': 0.0001, 'bias_init': 0.5, 'filters': [5, 5], 'optimizer': tf.train.AdamOptimizer, 'mo': 0.09, 'fc_dim': [512]} params2 = {'convs': 2, 'channels': [32, 64], 'weight_init': 0.1, 'fcs': 1, 'lr': 0.0001, 'bias_init': 0.5, 'filters': [5, 5], 'optimizer': tf.train.AdamOptimizer, 'mo': 0.09, 'fc_dim': [512]} arch1 = Arch.make_arch(params1) arch2 = Arch.make_arch(params2) print SANN.hamming_dist(arch1, arch2)
def main(): np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ total, used = os.popen( 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' ).read().split('\n')[args.gpu].split(',') total = int(total) used = int(used) print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) logging.info(f"seed = {args.seed}") criterion = nn.CrossEntropyLoss().to(device) model = Network(args.init_ch, 10, args.layers, criterion).to(device) logging.info("Total param size = %f MB", utils.count_parameters_in_MB(model)) # this is the optimizer to optimize optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0 if 'pydevd' in sys.modules else 4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=0 if 'pydevd' in sys.modules else 4) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.lr_min) arch = Arch(model, args) lines = [f'epoch\ttrain_acc\tval_acc'] genotype = '' for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('\nEpoch: %d lr: %e', epoch, lr) genotype = model.genotype() logging.info('Genotype: %s', genotype) # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr) logging.info('train acc: %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid acc: %f', valid_acc) lines.append(f'{epoch}\t{train_acc}\t{valid_acc}') timebudget.report() utils.save(model, os.path.join(args.exp_path, 'search.pt')) pathlib.Path(os.path.join(args.exp_path, 'search.tsv')).write_text('\n'.join(lines)) pathlib.Path(os.path.join(args.exp_path, 'genotype.txt')).write_text(str(genotype))
"""init_params = { 'convs': 2, 'channels': [3, 5], 'weight_init': 0.5, 'fcs': 3, 'lr': 0.006, 'bias_init': 0.1, 'filters': [3, 11], 'optimizer': tf.train.AdagradOptimizer, 'mo': 0.5, 'fc_dim': [128, 256, 64] }""" """init_params = { 'convs': 2, 'channels': [32, 64], 'weight_init': 0.1, 'fcs': 1, 'lr': 0.0001, 'bias_init': 0.1, 'filters': [5, 5], 'optimizer': tf.train.AdamOptimizer, 'mo': 0.5, 'fc_dim': [512] }""" init_arch = Arch(init_params) s = SANN(init_arch, T = 10) hc = HillClimber(init_arch, 10) s.run() hc.run() print "\n\nFinal SANN Loss: " + str(s.best.loss()) print "Final SANN Accuracy: " + str(s.best.acc()) print "Final SANN: " + str(s.best) print "\n\nFinal HC loss: " + str(hc.best.loss()) print "Final HC Accuracy: " + str(hc.best.acc())
def GET(self): # Corrupt Cache # * Importing bsddb fails on many 2.7 instances, # python tries to import corrupt cached using bsddb, this fails # * DBPageNotFoundError, since python tried to import corrupt cache # as a bsddb file, and it is not, it will error out i = web.input() feed = web.websafe(i.feed) if feed == 'news': try: newsCache = shelve.open(newsFile) except ImportError: os.remove(newsFile) newsCache = shelve.open(newsFile) try: newsFeed = feedcache.Cache(newsCache,timeToLiveSeconds).fetch(newsRss) except: newsCache.close() newsCache = shelve.open(newsFile) os.remove(newsFile) newsFeed = feedcache.Cache(newsCache,timeToLiveSeconds).fetch(newsRss) newsCache.close() news = [(x.title, x.link) for x in newsFeed.entries][:maxNEWS] return render.news(news) elif feed == 'i686': try: x86Cache = shelve.open(x86File) except: os.remove(x86File) x86Cache = shelve.open(x86File) try: x86Feed = feedcache.Cache(x86Cache,timeToLiveSeconds).fetch(x86Rss) except: x86Cache.close() os.remove(x86File) x86Cache = shelve.open(x86File) x86Feed = feedcache.Cache(x86Cache,timeToLiveSeconds).fetch(x86Rss) x86Cache.close() x86Pkgs = [(x.title, x.category, x.link, x.summary) for x in x86Feed.entries][:maxPKGS] x86=Arch() x86.add_packages(x86Pkgs) return render.packages(x86) elif feed == 'x86_64': try: x64Cache = shelve.open(x64File) except ImportError: os.remove(x64File) x64Cache = shelve.open(x64File) try: x64Feed = feedcache.Cache(x64Cache,timeToLiveSeconds).fetch(x64Rss) except: x64Cache.close() os.remove(x64File) x64Cache = shelve.open(x64File) x64Feed = feedcache.Cache(x64Cache,timeToLiveSeconds).fetch(x64Rss) x64Cache.close() x64Pkgs = [(x.title, x.category, x.link, x.summary) for x in x64Feed.entries][:maxPKGS] x64=Arch() x64.add_packages(x64Pkgs) return render.packages(x64)
def create_arches(ai_settings, screen, arches, ry, rx, xx, tp): arch = Arch(ai_settings, screen) arch.rect.y = ry arch.rect.x = rx arch.x = xx arches.add(arch) if tp == "l": arch.image1 = pygame.image.load('assets/arleft1.bmp') arch.image2 = pygame.image.load('assets/arleft2.bmp') arch.image3 = pygame.image.load('assets/arleft3.bmp') if tp == "r": arch.image1 = pygame.image.load('assets/armright1.bmp') arch.image2 = pygame.image.load('assets/armright2.bmp') arch.image3 = pygame.image.load('assets/armright3.bmp') if tp == "rd" or tp == "ld": arch.image1 = pygame.image.load('assets/arend1.bmp') arch.image2 = pygame.image.load('assets/arend2.bmp') arch.image3 = pygame.image.load('assets/arend3.bmp')
help='architecture definition (XML)') arg_parser.add_argument('--list-segments', action='store_true') arg_parser.add_argument('image_definition', type=argparse.FileType('r'), nargs=1, help='image definition (XML)') arg_parser.add_argument('image_binary', type=argparse.FileType('wb'), nargs=1, help='image binary output') args = arg_parser.parse_args() arch_tree = xml.etree.ElementTree.parse(args.arch) args.arch.close() arch = Arch(arch_tree) image_tree = xml.etree.ElementTree.parse(args.image_definition[0]) args.image_definition[0].close() image = Image(arch, image_tree) print("assigning coordinates of objects") image.assign_coordinates() # XXX need a pass after assigning coordinates parse contents of data # segments, so that intersegment references can be resolved print("computing sizes of segments") image.compute_segment_sizes() print("allocating physical memory to objects")
#2 Load data # Define a transform to normalize the data transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # Download and load the training data trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) # Download and load the test data testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) #3 Define different architectures arch = Arch() #4 Define hyper parameters epochs = 2 show_every = 128 criterion = nn.NLLLoss() flatten = lambda x: x.view(x.shape[0], -1) #5 Train network for architecture in arch.next_arch(): network = Network(architecture) print(network.get_description()) for lr in arch.learning_rates: network.set_lr(lr) optimizer = optim.Adam(network.parameters(), lr=lr) trainer = Trainer(network, criterion, optimizer)\
def main(): np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # # print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ args.unrolled = True #when True optimize step is on alfa and w # if False optimization is only on w, ordinary backprop, after pruning logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss().to(device) model = Network(args.init_ch, 10, args.layers, criterion) #.to(device) logging.info("Total param size = %f MB", utils.count_parameters_in_MB(model)) # this is the optimizer to optimize optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=2) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.lr_min) #create similar queues for snip function snip_train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.snipbatchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) #for runing on a pc should be one snip_valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.snipbatchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=2) #for runing on a pc should be one #we don't need the validation set queue but I don't want to break anything #TODO remove it later snip_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.lr_min) #I don't know if we need scheduler like this, maybe set all the fancy settings to zero? arch = Arch(model, args) # TODO: how to call minibatches on snip ? do we need one minibatch or more? if so how many? for (inputs_snip_batch, labels_snip_batch) in enumerate(train_queue): inputs_snip_batch, labels_snip_batch = inputs_snip_batch.to( device), labels_snip_batch.cuda(non_blocking=True) model = prune.snip(model, inputs_snip_batch, labels_snip_batch) #TODO learning: what is epoch? for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('\nEpoch: %d lr: %e', epoch, lr) genotype = model.genotype() logging.info('Genotype: %s', genotype) # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr) logging.info('train acc: %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid acc: %f', valid_acc) utils.save(model, os.path.join(args.exp_path, 'search.pt'))
def main(): # initialize architecture class a = Arch() a.develop_memory() a.develop_registers() # initialize all stages ifid = IF_ID() count = 0 incremented_pc = 0x7A000 with open('ins', 'r') as f: for line in f: count = count + 1 incremented_pc += 4 sep = list(line) # separate each hex digit into list element s = int(line, 16) # convert str to short int if int(sep[2], 16) == 0: # opcode check for r-format r = RFormat() # initialize new r-format object r.ins = s # set instruction r.opcode = (s & r.R_MASKS["opcode"]) >> (32 - 6) r.src1 = (s & r.R_MASKS["s1"]) >> 21 r.src2 = (s & r.R_MASKS["s2"]) >> 16 r.dest = (s & r.R_MASKS["dest"]) >> 11 r.shamt = (s & r.R_MASKS["shamt"]) >> 6 func = (s & r.R_MASKS["func"]) # preliminary func # find correct matching function for option in r.func_options.keys(): if func == option: # set function in instruction object r.func = r.func_options[func] # add to Architecture dictionary a.hash[count] = dict( pc=hex(incremented_pc), addr=r.format(), ins=r.as_hex(), opcode=r.opcode, src1=r.src1, src2=r.src2, dest=r.dest, shamt=r.shamt, func=r.func, raw_func=hex(func)[2:], ) a.example.append(r.full_ins()) else: i = IFormat() # initialize new i-format object i.ins = s # set instruction i.opcode = (s & i.I_MASKS["opcode"]) >> (32 - 6) i.src1 = (s & i.I_MASKS["s1"]) >> 21 i.dest_src = (s & i.I_MASKS["ds"]) >> 16 i.offset = (s & i.I_MASKS["off"]) # find correct matching operation for op in i.ops.keys(): if i.opcode == op: # set string equivalent instruction (e.g. "lw", "sw") i.op = i.ops[i.opcode] if (i.op == i.ops[0x20]) or (i.op == i.ops[0x28]) or ( i.op == i.ops[0x23]) or (i.op == i.ops[0x2b]): i.offset = i.signed_offset() # add to Architecture dictionary a.hash[count] = dict( pc=hex(incremented_pc), addr=i.format(), ins=i.as_hex(i.ins), opcode=i.opcode, src1=i.src1, dest_src=i.dest_src, offset=i.offset, func=i.op, ) a.example.append(i.full_ins()) a.pipeline()
if eprime < e: return 1 else: return 0 def get_nearest_neighbors(self): return [1] def iterate(): curr_loss = self.current_arch.loss() neighbors = self.get_nearest_neighbors() arch_prime = self.choose_neighbor(neighbors) loss_prime = arch_prime.loss() prob = self.prob_function(curr_loss, loss_prime, 10) if prob > random.random(): self.current_arch = arch_prime if __name__ == '__main__': init_arch = Arch.make_arch([.003, .3, .05, .05, 2, 2, [5, 3], [11, 5], [512, 256], tf.train.MomentumOptimizer]) n = 1000 s = GridSearch(init_arch, n) for i in range(n): s.iterate() print "final: " + str(s.current_arch.loss())
def main(): # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # # print('Total GPU mem:', total, 'used:', used) # # try: # block_mem = 0.91 * (total - used) # x = torch.empty((256, 1024, int(block_mem))).cuda() # except RuntimeError as err: # print(err) # try: # block_mem = 0.85 * (total - used) # x = torch.empty((256, 1024, int(block_mem))).cuda() # except RuntimeError as err: # print(err) # block_mem = 0.5 * (total - used) # x = torch.empty((256, 1024, int(block_mem))).cuda() # # # print('allocated mem:', x.numel() / 256 / 1024) # del x # print('reuse mem now ...') # ================================================ args.unrolled = True np.random.seed(args.seed) # cudnn.benchmark = True # cudnn.enabled = True torch.manual_seed(args.seed) logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss().to(device) model = Network(args.init_ch, 10, args.layers, criterion).to(device) logging.info("Total param size = %f MB", utils.count_parameters_in_MB(model)) # this is the optimizer to optimize optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=False) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=False) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.lr_min) arch = Arch(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('\nEpoch: %d lr: %e', epoch, lr) genotype = model.genotype() logging.info('Genotype: %s', genotype) # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr) logging.info('train acc: %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid acc: %f', valid_acc) utils.save(model, os.path.join(args.exp_path, 'weights.pt'))
def GET(self): # Corrupt Cache # * Importing bsddb fails on many 2.7 instances, # python tries to import corrupt cached using bsddb, this fails # * DBPageNotFoundError, since python tried to import corrupt cache # as a bsddb file, and it is not, it will error out i = web.input() feed = web.websafe(i.feed) if feed == 'news': try: newsCache = shelve.open(newsFile) except ImportError: os.remove(newsFile) newsCache = shelve.open(newsFile) try: newsFeed = feedcache.Cache(newsCache, timeToLiveSeconds).fetch(newsRss) except: newsCache.close() newsCache = shelve.open(newsFile) os.remove(newsFile) newsFeed = feedcache.Cache(newsCache, timeToLiveSeconds).fetch(newsRss) newsCache.close() news = [(x.title, x.link) for x in newsFeed.entries][:maxNEWS] return render.news(news) elif feed == 'i686': try: x86Cache = shelve.open(x86File) except: os.remove(x86File) x86Cache = shelve.open(x86File) try: x86Feed = feedcache.Cache(x86Cache, timeToLiveSeconds).fetch(x86Rss) except: x86Cache.close() os.remove(x86File) x86Cache = shelve.open(x86File) x86Feed = feedcache.Cache(x86Cache, timeToLiveSeconds).fetch(x86Rss) x86Cache.close() x86Pkgs = [(x.title, x.category, x.link, x.summary) for x in x86Feed.entries][:maxPKGS] x86 = Arch() x86.add_packages(x86Pkgs) return render.packages(x86) elif feed == 'x86_64': try: x64Cache = shelve.open(x64File) except ImportError: os.remove(x64File) x64Cache = shelve.open(x64File) try: x64Feed = feedcache.Cache(x64Cache, timeToLiveSeconds).fetch(x64Rss) except: x64Cache.close() os.remove(x64File) x64Cache = shelve.open(x64File) x64Feed = feedcache.Cache(x64Cache, timeToLiveSeconds).fetch(x64Rss) x64Cache.close() x64Pkgs = [(x.title, x.category, x.link, x.summary) for x in x64Feed.entries][:maxPKGS] x64 = Arch() x64.add_packages(x64Pkgs) return render.packages(x64)
def main(): np.random.seed(args.seed) # bechmark mode will cause cuDNN to evaluate algorithms for current machine and adapt to the best cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ total, used = os.popen( 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' ).read().split('\n')[args.gpu].split(',') total = int(total) used = int(used) print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ if not args.unrolled: print( 'WARNING: unrolled arg is NOT true. This is useful only for abalation study for bilevel optimization!' ) logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss().to(device) # CIFAR classification task # 16 inital channels, num_classes=10, 8 cells (layers) model = Network(args.init_ch, 10, args.layers, criterion).to(device) logging.info("Total param size = %f MB", utils.count_parameters_in_MB(model)) # this is the optimizer to optimize optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) # note that we get only train set here and break it down in 1/2 to get validation set # cifar10 has 60K images in 10 classes, 50k in train, 10k in test # so ultimately we have 25K train, 25K val, 10k test train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 # generate random batches of 64 on train/val subsets train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=2) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.lr_min) # arch is sort of meta model that would update theta and alpha parameters arch = Arch(model, args) # in this phase we only run 50 epochs for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('\nEpoch: %d lr: %e', epoch, lr) # genotype extracts the highest weighted two primitives per node # this is for information dump only genotype = model.genotype() logging.info('Genotype: %s', genotype) # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr) logging.info('train acc: %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid acc: %f', valid_acc) utils.save(model, os.path.join(args.exp_path, 'search.pt'))
#!/usr/bin/python import sys import subprocess import random from arch import Arch from instruction import eprint arch = Arch(sys.argv) if arch.fill() is None: eprint('Failed to fill ISA parsing tree') sys.exit(1) total_fails = 0 for instr in arch.instructions: ins_str = instr.get_string() ins_str = map(lambda c: c if c != 'x' else str(random.randint(0, 1)), ins_str) int_val = int(reduce(lambda s, res: s + res, ins_str), base=2) # print(int(ins_str, base=2)) proc = subprocess.Popen(['./a.out', str(int_val)], stdout=subprocess.PIPE) (out, err) = proc.communicate() out = out.rstrip('\n') res = instr.mnem == out or (instr.parent != '' and instr.parent == out) total_fails += not res print('Checking ' + instr.mnem + ': ' + out + '\t[' + ('OK' if res else 'FAIL') + ']') print if total_fails: print('Testing failed')
def main(): np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ for id in device_ids: total, used = os.popen( 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' ).read().split('\n')[id].split(',') print('GPU ({}) mem:'.format(id), total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ args.unrolled = True logging.info('GPU device = %s' % args.gpu) logging.info("args = %s", args) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.dataset == 'cifar10': if args.gold_fraction == 0: train_data = CIFAR10( root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) if args.clean_valid: gold_train_data = CIFAR10( root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR10( root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_classes = 10 elif args.dataset == 'cifar100': if args.gold_fraction == 0: train_data = CIFAR100( root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) if args.clean_valid: gold_train_data = CIFAR100( root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR100( root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_classes = 100 # Split data to train and validation num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 45000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) if args.clean_valid: valid_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=2) else: valid_queue = torch.utils.data.DataLoader( train_data, batch_size=batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=2) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().cuda() elif args.loss_func == 'rll': criterion = utils.RobustLogLoss().cuda() else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format(args.loss_func) model = Network(args.init_ch, num_classes, args.layers, criterion) if len(device_ids) > 1: model = MyDataParallel(model).cuda() else: model.cuda() # model = para_model.module.cuda() logging.info("Total param size = %f MB", utils.count_parameters_in_MB(model)) # this is the optimizer to optimize optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd, nesterov=True) scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.lr_min) arch = Arch(model, args) global start start = time.time() for epoch in range(args.epochs): current_time = time.time() if current_time - start >= args.time_limit: break scheduler.step() lr = scheduler.get_lr()[0] logging.info('\nEpoch: %d lr: %e', epoch, lr) genotype = model.genotype() logging.info('Genotype: %s', genotype) # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr) logging.info('train acc: %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid acc: %f', valid_acc) utils.save(model, os.path.join(args.exp_path, 'search_epoch1.pt'))