def merge_button_press(*args): results = load_state(args[-2]) data = results.get("data") previous_clicks = load_state(args[-1]).get("clicks") clicks = list(args[:-2]) button_clicked = None for i, click in enumerate(clicks): if click is None: clicks[i] = 0 # if not previous_clicks: previous_clicks = [0] * len(clicks) for i, click, previous_click in zip(range(len(clicks)), clicks, previous_clicks): # print(f"{click} vs {previous_click}") if click > previous_click: button_clicked = i break state = { "clicks": clicks, "button_clicked": button_clicked, "bucket_id": data["bucket id"][button_clicked], } return [merge.render_modal(button_clicked, data), save_state(state)]
def __init__(self, num_classes, pretrained=True, pool_first=True, **kwargs): super(RESNET18, self).__init__() self.resnet = torchvision.models.video.r3d_18(pretrained=False, progress=False, num_classes=num_classes, **kwargs) ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/r3d_18-b3b3357e.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) pretrained = torch.load(pretrained_model) # load_state(self.resnet, pretrained['state_dict']) load_state(self.resnet, pretrained) else: logging.info( "Network:: graph initialized, use random inilization!")
def load_state(self, path, Iter, resume=False): path = os.path.join(path, "ckpt_iter_{}.pth.tar".format(Iter)) if resume: utils.load_state(path, self.model, self.optim) else: utils.load_state(path, self.model)
def update_modal(result_button_clicks, search_state): print("Updating Modal") result_state = load_state(result_button_clicks) search_state = load_state(search_state) button_clicked = result_state.get("button_clicked") df = search_state.get("data") return modal.render_modal(button_clicked, df)
def toggle_merge_modal(n_clicks, merge_state, database_update): button_clicked = load_state(merge_state).get("button_clicked") database_update_status = load_state(database_update).get("status") if database_update_status: return False if n_clicks is None and button_clicked is not None: return True elif n_clicks is not None: return False return False
def main(): global cfg cfg = Config.fromfile(args.config) cfg.save = '{}/{}-{}-{}'.format(cfg.save_path, cfg.model, cfg.dataset, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(cfg.save) logger = utils.create_logger('global_logger', cfg.save + '/log.txt') if not torch.cuda.is_available(): logger.info('no gpu device available') sys.exit(1) # Set cuda device & seed torch.cuda.set_device(cfg.gpu) np.random.seed(cfg.seed) cudnn.benchmark = True torch.manual_seed(cfg.seed) cudnn.enabled = True torch.cuda.manual_seed(cfg.seed) # Model print('==> Building model..') arch_code = eval('architecture_code.{}'.format(cfg.model)) net = models.model_entry(cfg, arch_code) net = net.cuda() cfg.netpara = sum(p.numel() for p in net.parameters()) / 1e6 logger.info('config: {}'.format(pprint.pformat(cfg))) # Load checkpoint. if not Debug: print('==> Resuming from checkpoint..') utils.load_state(cfg.resume_path, net) # Data print('==> Preparing data..') testloader = dataset_entry(cfg) criterion = nn.CrossEntropyLoss() net_adv = AttackPGD(net, cfg.attack_param) print('==> Testing on Clean Data..') test(net, testloader, criterion) print('==> Testing on Adversarial Data..') test(net_adv, testloader, criterion, adv=True)
def load(path): with open(path, "rb") as f: model_data, act_params = cloudpickle.load(f) act = deepq.build_act(**act_params) sess = tf.Session() sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) load_state(os.path.join(td, "model")) return ActWrapper(act, act_params)
def update_raw_string_bucket(update_clicks, modal_validity, bucket_id, bucket_name, search_results, result_clicks): modal_validity = load_state(modal_validity) validity = modal_validity.get('validity') data = load_state(search_results).get('data') button_clicked = load_state(result_clicks).get("button_clicked") if validity is None: return save_state({'updated': False}) if validity == INVALID: return save_state({'updated': False}) bucket_id = bucket_id if bucket_id is not None else "" bucket_name = bucket_name if bucket_name is not None else "" valid_bucket_id = modal_validity.get("bucket_id") valid_bucket_name = modal_validity.get("bucket_name") # It's okay if valid bucket name is none. Means it's a new bucket. if validity == NEW: if valid_bucket_id != bucket_id: return save_state({'updated': False}) if validity == EXISTS: if valid_bucket_id != bucket_id or valid_bucket_name != valid_bucket_name: return save_state({'updated': False}) raw_string = data["raw string"][button_clicked].replace("'", "\'") if sql.execute( f"SELECT count(*) FROM reference.organization_buckets_edits WHERE raw_string = '{raw_string}';" )['count'][0] > 0: sql.execute( f"UPDATE reference.organization_buckets_edits SET bucket = '{bucket_name}' WHERE raw_string = '{raw_string}';" ) sql.execute( f"UPDATE reference.organization_buckets_edits SET bucket_id = '{bucket_id}' WHERE raw_string = '{raw_string}';" ) else: sql.execute( f"INSERT INTO reference.organization_buckets_edits (raw_string, bucket, bucket_id, time) VALUES ('{raw_string}', '{bucket_name}', '{bucket_id}', GETDATE());" ) print(f"Database updated: {raw_string} to {bucket_id} ({bucket_name})") return save_state({'updated': True})
def view_calibration(id): """View control page for a camera Args: id (str): Identifier name of camera Returns: HTML page """ # TODO: if no session['UPLOAD_TOKEN'], redirect to page that asks one. if not is_existing_id(id): return "", 404 small_url = url_for("image_view", id=id, image="small") crop_url = url_for("image_view", id=id, image="crop") state = load_state(id) if request.method == "POST": mode = request.form["mode"] state["server"]["mode"] = mode state["server"]["exposure_modifier"] = round( get_float(request.form["exposure_modifier"], 1.0), 2) refresh_state(id, state["server"], "server") state_view = json.dumps(state, indent=4, sort_keys=True) return render_template( "view.html", id=id, small_url=small_url, crop_url=crop_url, state=state, state_view=state_view, )
def update_update_button_clicks(*args): clicks = args[:-1] previous_state = load_state(args[-1]) button_clicked = -1 # If previous state doesn't exist, then just look for a click: if not args[-1]: for i, click in enumerate(clicks): if click: button_clicked = i break # If previous state does exist, we need to find the click that increased. else: previous_clicks = previous_state.get("clicks") for i, n_click, last_n_click in zip(range(len(clicks)), clicks, previous_clicks): if n_click: if n_click > last_n_click: button_clicked = i # Ensure we've got no "None" clicks to store: clicks = list(clicks) for i, click in enumerate(clicks): if not click: clicks[i] = 0 # Save state: return save_state({"clicks": clicks, "button_clicked": button_clicked})
def update_state_search(n_clicks, raw_string_n_submits, bucket_name_n_submits, bucket_id_n_submits, database_updated, previous_state, raw_string, bucket_name, bucket_id): # print(f"Called Update State of Search Bar {raw_string}") previous_state = load_state(previous_state) # Determine Whether to Run Search: # We require one of n_clicks/n_submits to be not None # The callback is called once before anything is populated. # In this case, we just ignore it. Otherwise, we want to run search. run_search = True if n_clicks else \ True if raw_string_n_submits else \ True if bucket_name_n_submits else \ True if bucket_id_n_submits else False # If we run the search, do it! if run_search: raw_string = None if raw_string == '' else raw_string bucket_name = None if bucket_name == '' else bucket_name bucket_id = None if bucket_id == '' else bucket_id if raw_string or bucket_name or bucket_id: data = sql.execute(f""" SELECT raw_string as "Raw String" , coalesce(edits.bucket, original.bucket) as "Bucket Name" , coalesce(edits.bucket_id, original.bucket_id) as "Bucket ID" , original.bucket_id as "Original Bucket ID" , original.has_new_bucket as "New Bucket" FROM staging.organization_buckets original LEFT JOIN reference.organization_buckets_edits edits USING(raw_string) WHERE original.raw_string <> '' AND original.bucket <> '' { f" AND (original.raw_string ~* '{raw_string}' ) " if raw_string else '' }{ f" AND (original.bucket ~* '{bucket_name}' ) " if bucket_name else '' } { f" AND (original.bucket_id ~* '{bucket_id}' ) " if bucket_id else '' } ORDER BY CASE WHEN original.has_new_bucket THEN 1 ELSE 0 END DESC, 3,2,1 LIMIT {results.N_ROWS} """) else: data = None else: data = None # Write out the current search state. new_state = { "raw_string": raw_string, "bucket_name": bucket_name, "bucket_id": bucket_id, "run_search": run_search, "data": data } return (save_state(new_state), results.generate_results_table(new_state.get("data")))
def load_state(self, root, Iter, resume=False): path = os.path.join(root, "ckpt_iter_{}.pth.tar".format(Iter)) netD_path = os.path.join(root, "D_iter_{}.pth.tar".format(Iter)) if resume: utils.load_state(path, self.model, self.optim) utils.load_state(netD_path, self.netD, self.optimD) else: utils.load_state(path, self.model) utils.load_state(netD_path, self.netD)
def toggle_modal(result_clicks, close_n_clicks, database_update, previous_is_open, button_clicks): button_clicks = load_state(button_clicks) button_clicked = button_clicks.get("button_clicked", -1) print(f"Toggling Modal to {not previous_is_open}") if close_n_clicks is not None or (button_clicked >= 0): return not previous_is_open return previous_is_open
def load_state(self, path, Iter, resume=False): model_path = os.path.join(path, "ckpt_iter_{}.pth.tar".format(Iter)) discriminator_path = os.path.join(path, "D_iter_{}.pth.tar".format(Iter)) if resume: utils.load_state(model_path, self.model, self.optim) utils.load_state(discriminator_path, self.netD, self.optimD) else: utils.load_state(model_path, self.model) utils.load_state(discriminator_path, self.netD)
def ajax_get_state(id): """Get the current reported state from a camera as a formatted string Args: id (str): Identifier name of camera Returns: string """ if not is_existing_id(id): return "", 404 state_view = json.dumps(load_state(id), indent=4, sort_keys=True) return state_view, 200
def train(self): self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics) while True: info = self.agent.step() if info['update']: logger.logkvs(info['update']) logger.dumpkvs() if self.agent.rollout.stats['tcount'] == 0: fname = os.path.join(self.hps['save_dir'], 'checkpoints') if os.path.exists(fname+'.index'): load_state(fname) print('load successfully') else: print('fail to load') if self.agent.rollout.stats['tcount']%int(self.num_timesteps/self.num_timesteps)==0: fname = os.path.join(self.hps['save_dir'], 'checkpoints') save_state(fname) if self.agent.rollout.stats['tcount'] > self.num_timesteps: break # print(self.agent.rollout.stats['tcount']) self.agent.stop_interaction()
def test(**kwargs): opt.parse(kwargs) dataset = OCRDataset('data/images', 'data/labels', 'data/test.imglist', opt.input_size, 'test', opt.chars_list, opt.max_seq) dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_works) model = getattr(models, opt.model)(opt.basenet, opt.input_size, opt.max_seq, opt.num_classes, mode='test', attn=opt.attn) load_state(model, opt.load_model_path, "cuda:%d" % opt.gpus[0]) model = gpu(model, opt) model.eval() t_score = 0.3 match, all = 0, 0 for inputs, text in dataloader: inputs = gpu(inputs, opt) with torch.no_grad(): outputs = model(inputs) outputs = word_format(outputs, t_score) outputs = outputs[0].detach().cpu().numpy() outputs = outputs[np.where(np.max(outputs, 1) != 0)[0]] idx = np.argmax(outputs, 1) idx = idx[np.where(idx != 0)[0]] preds = ''.join([opt.chars_list[i] for i in idx]) text = text[0] if text == preds: match += 1 else: print('text/pred:%s,%s' % (text, preds)) all += 1 torch.cuda.empty_cache() print('match/all(%2f): %d/%d' % (match / all, match, all))
def update_search_results(id_submits, name_submits, n_clicks, database_update, bucket_id, bucket_name, previous_search): previous_search = load_state(previous_search) database_update = load_state(database_update) # n_triggers = id_submits if id_submits else 0 # + name_submits if name_submits else 0 # + n_clicks if n_clicks else 0 # previous_triggers = previous_search.get("triggers", 0) # if n_triggers > previous_triggers or database_update.get("status"): search_results = results.search(bucket_id, bucket_name) state = save_state({ 'data': search_results, # 'n_triggers': n_triggers }) layout = results.generate_result_table(search_results) return [state, layout]
def merge_bucket(n_clicks, new_bucket_id, merge_state): merge_state = load_state(merge_state) is_valid = merge.validate(new_bucket_id) status = False old_bucket_id = None if is_valid: old_bucket_id = merge_state.get("bucket_id") status = merge.merge(old_bucket_id, new_bucket_id) # print(status) return save_state({ 'status': status, })
def train(train, model, criterion, optimizer, n_lettres, n_epochs, log_dir, checkpoint_path): losses = [] writer = SummaryWriter(log_dir=log_dir) pbar = tqdm(range(n_epochs), total=n_epochs, file=sys.stdout) state = load_state(checkpoint_path, model, optimizer) for i in pbar: l = [] for x, y in train: x = x.squeeze(-1).permute(1, 0, -1).to(device) seq_len, batch_size, embeding = x.shape y = y.view(seq_len * batch_size).to(device) o = state.model(x, state.model.initHidden(batch_size).to(device)) d = state.model.decode(o).view(seq_len * batch_size, embeding) loss = criterion(d, y) loss.backward() state.optimizer.step() state.optimizer.zero_grad() l.append(loss.item()) state.iteration += 1 state.epoch += 1 save_state(checkpoint_path, state) lo = np.mean(l) losses.append(lo) # \tTest: Loss: {np.round(test_lo, 4)} pbar.set_description(f'Train: Loss: {np.round(lo, 4)}') writer.add_scalar('Loss/train', lo, i) return losses
def respond_to_validity(modal_validity): modal_validity = load_state(modal_validity) validity = modal_validity.get("validity", INVALID) disabled = False update_button_color = "btn-danger" new_bucket_name = modal_validity.get("bucket_name") new_bucket_name_disabled = True new_bucket_warning = "" if validity == INVALID: disabled = True update_button_color = "btn-danger" elif validity == NEW: update_button_color = "btn-warning" new_bucket_name_disabled = False new_bucket_warning = "Please choose a name for your new bucket." elif validity == EXISTS: update_button_color = "btn-success" return (disabled, update_button_color, new_bucket_name if new_bucket_name is not None else "", new_bucket_name_disabled, new_bucket_warning)
def __init__( self, depth=50, pretrained=True, # pretrained2d=True, num_stages=4, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=[3], conv1_kernel_t=5, conv1_stride_t=2, pool1_kernel_t=1, pool1_stride_t=2, style='pytorch', frozen_stages=-1, inflate_freq=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)), # For C2D baseline, this is set to -1. inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 0, 0, 0), # nonlocal_freq=(0, 1, 1, 0), # Default setting nonlocal_cfg=None, bn_eval=False, bn_frozen=False, partial_bn=False, with_cp=False, num_classes=11): super(Res_I3D, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.depth = depth self.pretrained = pretrained # self.pretrained2d = pretrained2d self.num_stages = num_stages assert num_stages >= 1 and num_stages <= 4 self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.style = style self.frozen_stages = frozen_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.with_cp = with_cp self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.conv1 = nn.Conv3d(3, 64, kernel_size=(conv1_kernel_t, 7, 7), stride=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(pool1_kernel_t, 3, 3), stride=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1)) #TODO: Check whether pad=0 differs a lot self.pool2 = nn.MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)) self.avgpool = SimpleSpatialTemporalModule(spatial_type='avg', temporal_size=2, spatial_size=7) self.cls_head = ClsHead(with_avg_pool=False, temporal_feature_size=1, spatial_feature_size=1, dropout_ratio=0.5, in_channels=2048, num_classes=num_classes) self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, style=self.style, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, with_cp=with_cp) self.inplanes = planes * self.block.expansion layer_name = 'layer{}'.format(i + 1) self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/i3d_kinetics_rgb_r50_c3d.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) pretrained = torch.load(pretrained_model) load_state(self, pretrained['state_dict']) else: logging.info( "Network:: graph initialized, use random inilization!")
def main(): global args, best_prec1 args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for key in config: for k, v in config[key].items(): setattr(args, k, v) args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model #print("=> creating model '{}'".format(args.model)) #if 'se_resnext50_32x4d_v1_sn' in args.model: # model = models.__dict__[args.model](using_moving_average = args.using_moving_average, last_gamma=args.last_gamma) #else: # model = models.__dict__[args.model](using_moving_average=args.using_moving_average) #model = resnet18() model = ResNet18() #model = SENet18() if not args.distributed: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) # auto resume from a checkpoint model_dir = args.model_dir start_epoch = 0 if not os.path.exists(model_dir) : os.makedirs(model_dir) if args.evaluate: utils.load_state_ckpt(args.checkpoint_path, model) else: best_prec1, start_epoch = utils.load_state(model_dir, model, optimizer=optimizer) writer = SummaryWriter(model_dir) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion, 0, writer) return train_dataset_multi_scale = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ColorAugmentation(), normalize, ])) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ColorAugmentation(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader_multi_scale = torch.utils.data.DataLoader( train_dataset_multi_scale, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) if not args.using_moving_average: train_dataset_snhelper = datasets.ImageFolder( traindir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) train_loader_snhelper = torch.utils.data.DataLoader( train_dataset_snhelper, batch_size=args.batch_size * torch.cuda.device_count(), shuffle=(train_sampler is None), #train_dataset_snhelper, batch_size=1, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) niters = len(train_loader) lr_scheduler = LRScheduler(optimizer, niters, args) for epoch in range(start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # train for one epoch if epoch < args.epochs - 5: train(train_loader_multi_scale, model, criterion, optimizer, lr_scheduler, epoch, writer) else: train(train_loader, model, criterion, optimizer, lr_scheduler, epoch, writer) if not args.using_moving_average: sn_helper(train_loader_snhelper, model) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch, writer) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) utils.save_checkpoint(model_dir, { 'epoch': epoch + 1, 'model': args.model, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for k, v in config['common'].items(): setattr(args, k, v) torch.cuda.manual_seed(int(time.time()) % 1000) # create model print("=> creating model '{}'".format(args.arch)) if args.arch.startswith('inception_v3'): print('inception_v3 without aux_logits!') image_size = 341 input_size = 299 model = models.__dict__[args.arch](aux_logits=True, num_classes=1000, pretrained=args.pretrained) else: image_size = 182 input_size = 160 student_model = models.__dict__[args.arch]( num_classes=args.num_classes, pretrained=args.pretrained, avgpool_size=input_size / 32) student_model.cuda() student_params = list(student_model.parameters()) student_optimizer = torch.optim.Adam(student_model.parameters(), args.base_lr * 0.1) args.save_path = "checkpoint/" + args.exp_name if not osp.exists(args.save_path): os.mkdir(args.save_path) tb_logger = SummaryWriter(args.save_path) logger = create_logger('global_logger', args.save_path + '/log.txt') for key, val in vars(args).items(): logger.info("{:16} {}".format(key, val)) criterion = nn.CrossEntropyLoss() print("Build network") last_iter = -1 best_prec1 = 0 load_state(args.save_path + "/ckptmodel_best.pth.tar", student_model) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) se_normalize = se_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) border_value = int(np.mean([0.485, 0.456, 0.406]) * 255 + 0.5) test_aug = se_transforms.ImageAugmentation(True, 0, rot_std=0.0, scale_u_range=[0.75, 1.333], affine_std=0, scale_x_range=None, scale_y_range=None) val_dataset = NormalDataset(args.val_root, "./data/visda/list/validation_list.txt", transform=transforms.Compose([ se_transforms.ScaleAndCrop( (input_size, input_size), args.padding, False, np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])) ]), is_train=False, args=args) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers) val_multi_dataset = NormalDataset( args.val_root, "./data/visda/list/validation_list.txt", transform=transforms.Compose([ se_transforms.ScaleCropAndAugmentAffineMultiple( 16, (input_size, input_size), args.padding, True, test_aug, border_value, np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])) ]), is_train=False, args=args) val_multi_loader = DataLoader(val_multi_dataset, batch_size=1, shuffle=False, num_workers=args.workers) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( student_optimizer, args.lr_steps, args.lr_gamma) #logger.info('{}'.format(args)) validate(val_loader, student_model, criterion) validate_multi(val_multi_loader, student_model, criterion)
# dataset dataset = load_cifar10(args.dataset_root, 'test') #dataset = None # model ranks = [] if args.is_me: ranks = [args.d_c, args.d_s1, args.d_s2] else: ranks = [args.d_c, args.d_s1] args.ranks = ranks model = WoodburyGlow(args) if args.cuda: model = model.cuda() assert args.model_path != "", (print("need to load a model")) state = load_state(args.model_path, args.cuda) model.load_state_dict(state["model"]) del state print("number of parameters: {}".format(count_parameters(model))) # begin to test inferencer = Inferencer(model, dataset, args) #inferencer.Inference() inferencer.Sample(args.n_samples, args.sample_each_row)
def main(): global args, config, X args = parser.parse_args() print(args) with open(args.config) as f: config = EasyDict(yaml.load(f)) config.save_path = os.path.dirname(args.config) ####### regular set up assert torch.cuda.is_available() device = torch.device("cuda") config.device = device # random seed setup print("Random Seed: ", config.seed) random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) cudnn.benchmark = True ####### regular set up end netG = torch.nn.DataParallel(NetG(ngf=config.ngf)) netD = torch.nn.DataParallel(NetD(ndf=config.ndf)) netF = torch.nn.DataParallel(NetF()) netI = torch.nn.DataParallel(NetI()).eval() for param in netF.parameters(): param.requires_grad = False criterion_MSE = nn.MSELoss() fixed_sketch = torch.tensor(0, device=device).float() fixed_hint = torch.tensor(0, device=device).float() fixed_sketch_feat = torch.tensor(0, device=device).float() #################### netD = netD.to(device) netG = netG.to(device) netF = netF.to(device) netI = netI.to(device) criterion_MSE = criterion_MSE.to(device) # setup optimizer optimizerG = optim.Adam(netG.parameters(), lr=config.lr_scheduler.base_lr, betas=(0.5, 0.9)) optimizerD = optim.Adam(netD.parameters(), lr=config.lr_scheduler.base_lr, betas=(0.5, 0.9)) last_iter = -1 best_fid = 1e6 if args.resume: best_fid, last_iter = load_state(args.resume, netG, netD, optimizerG, optimizerD) config.lr_scheduler['last_iter'] = last_iter config.lr_scheduler['optimizer'] = optimizerG lr_schedulerG = get_scheduler(config.lr_scheduler) config.lr_scheduler['optimizer'] = optimizerD lr_schedulerD = get_scheduler(config.lr_scheduler) tb_logger = SummaryWriter(config.save_path + '/events') logger = create_logger('global_logger', config.save_path + '/log.txt') logger.info(f'args: {pprint.pformat(args)}') logger.info(f'config: {pprint.pformat(config)}') batch_time = AverageMeter(config.print_freq) data_time = AverageMeter(config.print_freq) flag = 1 mu, sigma = 1, 0.005 X = stats.truncnorm((0 - mu) / sigma, (1 - mu) / sigma, loc=mu, scale=sigma) i = 0 curr_iter = last_iter + 1 dataloader = train_loader(config) data_iter = iter(dataloader) end = time.time() while i < len(dataloader): lr_schedulerG.step(curr_iter) lr_schedulerD.step(curr_iter) current_lr = lr_schedulerG.get_lr()[0] ############################ # (1) Update D network ########################### for p in netD.parameters(): # reset requires_grad p.requires_grad = True # they are set to False below in netG update for p in netG.parameters(): p.requires_grad = False # to avoid computation ft_params # train the discriminator Diters times j = 0 while j < config.diters: netD.zero_grad() i += 1 j += 1 data_end = time.time() real_cim, real_vim, real_sim = data_iter.next() data_time.update(time.time() - data_end) real_cim, real_vim, real_sim = real_cim.to(device), real_vim.to(device), real_sim.to(device) mask = mask_gen() hint = torch.cat((real_vim * mask, mask), 1) # train with fake with torch.no_grad(): feat_sim = netI(real_sim).detach() fake_cim = netG(real_sim, hint, feat_sim).detach() errD_fake = netD(fake_cim, feat_sim) errD_fake = errD_fake.mean(0).view(1) errD_fake.backward(retain_graph=True) # backward on score on real errD_real = netD(real_cim, feat_sim) errD_real = errD_real.mean(0).view(1) errD = errD_real - errD_fake errD_realer = -1 * errD_real + errD_real.pow(2) * config.drift errD_realer.backward(retain_graph=True) # backward on score on real gradient_penalty = calc_gradient_penalty(netD, real_cim, fake_cim, feat_sim) gradient_penalty.backward() optimizerD.step() ############################ # (2) Update G network ############################ for p in netD.parameters(): p.requires_grad = False # to avoid computation for p in netG.parameters(): p.requires_grad = True netG.zero_grad() data = data_iter.next() real_cim, real_vim, real_sim = data i += 1 real_cim, real_vim, real_sim = real_cim.to(device), real_vim.to(device), real_sim.to(device) if flag: # fix samples mask = mask_gen() hint = torch.cat((real_vim * mask, mask), 1) with torch.no_grad(): feat_sim = netI(real_sim).detach() tb_logger.add_image('target imgs', vutils.make_grid(real_cim.mul(0.5).add(0.5), nrow=4)) tb_logger.add_image('sketch imgs', vutils.make_grid(real_sim.mul(0.5).add(0.5), nrow=4)) tb_logger.add_image('hint', vutils.make_grid((real_vim * mask).mul(0.5).add(0.5), nrow=4)) fixed_sketch.resize_as_(real_sim).copy_(real_sim) fixed_hint.resize_as_(hint).copy_(hint) fixed_sketch_feat.resize_as_(feat_sim).copy_(feat_sim) flag -= 1 mask = mask_gen() hint = torch.cat((real_vim * mask, mask), 1) with torch.no_grad(): feat_sim = netI(real_sim).detach() fake = netG(real_sim, hint, feat_sim) errd = netD(fake, feat_sim) errG = errd.mean() * config.advW * -1 errG.backward(retain_graph=True) feat1 = netF(fake) with torch.no_grad(): feat2 = netF(real_cim) contentLoss = criterion_MSE(feat1, feat2) contentLoss.backward() optimizerG.step() batch_time.update(time.time() - end) ############################ # (3) Report & 100 Batch checkpoint ############################ curr_iter += 1 if curr_iter % config.print_freq == 0: tb_logger.add_scalar('VGG MSE Loss', contentLoss.item(), curr_iter) tb_logger.add_scalar('wasserstein distance', errD.item(), curr_iter) tb_logger.add_scalar('errD_real', errD_real.item(), curr_iter) tb_logger.add_scalar('errD_fake', errD_fake.item(), curr_iter) tb_logger.add_scalar('Gnet loss toward real', errG.item(), curr_iter) tb_logger.add_scalar('gradient_penalty', gradient_penalty.item(), curr_iter) tb_logger.add_scalar('lr', current_lr, curr_iter) logger.info(f'Iter: [{curr_iter}/{len(dataloader)//(config.diters+1)}]\t' f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' f'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' f'errG {errG.item():.4f}\t' f'errD {errD.item():.4f}\t' f'err_D_real {errD_real.item():.4f}\t' f'err_D_fake {errD_fake.item():.4f}\t' f'content loss {contentLoss.item():.4f}\t' f'LR {current_lr:.4f}' ) if curr_iter % config.print_img_freq == 0: with torch.no_grad(): fake = netG(fixed_sketch, fixed_hint, fixed_sketch_feat) tb_logger.add_image('colored imgs', vutils.make_grid(fake.detach().mul(0.5).add(0.5), nrow=4), curr_iter) if curr_iter % config.val_freq == 0: fid, var = validate(netG, netI) tb_logger.add_scalar('fid_val', fid, curr_iter) tb_logger.add_scalar('fid_variance', var, curr_iter) logger.info(f'fid: {fid:.3f} ({var})\t') # remember best fid and save checkpoint is_best = fid < best_fid best_fid = min(fid, best_fid) save_checkpoint({ 'step': curr_iter - 1, 'state_dictG': netG.state_dict(), 'state_dictD': netD.state_dict(), 'best_fid': best_fid, 'optimizerG': optimizerG.state_dict(), 'optimizerD': optimizerD.state_dict(), }, is_best, config.save_path + '/ckpt') end = time.time()
import matplotlib.pyplot as plt from utils import load_state, load_train_logger if __name__ == '__main__': state = load_state('checkpoints/stn7/epoch_20.pth') train_logger = load_train_logger(state) losses = train_logger.epoch_losses() epoches = range(1, train_logger.n_epoches() + 1) plt.plot(epoches, losses) plt.xticks(epoches) plt.xlabel('epoches') plt.ylabel('loss') plt.show()
matches.update(match) discovered_matchIds[matchId] = True # Sleep to stay under the API data rate limit time.sleep(TIME_SLEEP) if loop_count % CHECKPOINT_INTERVAL == 0: # Save data every CHECKPOINT_INTERVAL number of summonerIds checkpoint_num += 1 utils.save_state(checkpoint_num, matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop) return "" if __name__ == '__main__': rg = riot_games_api.RiotGames('lol/riot_games_api.key') # Read parameters from terminal summonerId = int(sys.argv[1]) checkpoint_num = int(sys.argv[2]) print "Starting at summonerId %d and checkpoint num %d ..." % (summonerId, checkpoint_num) # Initialize or load checkpoint data if checkpoint_num == -1: matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop = initialize(summonerId) else: matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop = utils.load_state(checkpoint_num) # They call it a mine! A MINE!! mine(checkpoint_num, matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop)
def main(): global args, config, best_prec1 args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) config = EasyDict(config['common']) config.save_path = os.path.dirname(args.config) rank, world_size = dist_init() # create model bn_group_size = config.model.kwargs.bn_group_size bn_var_mode = config.model.kwargs.get('bn_var_mode', 'L2') if bn_group_size == 1: bn_group = None else: assert world_size % bn_group_size == 0 bn_group = simple_group_split(world_size, rank, world_size // bn_group_size) config.model.kwargs.bn_group = bn_group config.model.kwargs.bn_var_mode = (link.syncbnVarMode_t.L1 if bn_var_mode == 'L1' else link.syncbnVarMode_t.L2) model = model_entry(config.model) if rank == 0: print(model) model.cuda() if config.optimizer.type == 'FP16SGD' or config.optimizer.type == 'FusedFP16SGD': args.fp16 = True else: args.fp16 = False if args.fp16: # if you have modules that must use fp32 parameters, and need fp32 input # try use link.fp16.register_float_module(your_module) # if you only need fp32 parameters set cast_args=False when call this # function, then call link.fp16.init() before call model.half() if config.optimizer.get('fp16_normal_bn', False): print('using normal bn for fp16') link.fp16.register_float_module(link.nn.SyncBatchNorm2d, cast_args=False) link.fp16.register_float_module(torch.nn.BatchNorm2d, cast_args=False) link.fp16.init() model.half() model = DistModule(model, args.sync) # create optimizer opt_config = config.optimizer opt_config.kwargs.lr = config.lr_scheduler.base_lr if config.get('no_wd', False): param_group, type2num = param_group_no_wd(model) opt_config.kwargs.params = param_group else: opt_config.kwargs.params = model.parameters() optimizer = optim_entry(opt_config) # optionally resume from a checkpoint last_iter = -1 best_prec1 = 0 if args.load_path: if args.recover: best_prec1, last_iter = load_state(args.load_path, model, optimizer=optimizer) else: load_state(args.load_path, model) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # augmentation aug = [ transforms.RandomResizedCrop(config.augmentation.input_size), transforms.RandomHorizontalFlip() ] for k in config.augmentation.keys(): assert k in [ 'input_size', 'test_resize', 'rotation', 'colorjitter', 'colorold' ] rotation = config.augmentation.get('rotation', 0) colorjitter = config.augmentation.get('colorjitter', None) colorold = config.augmentation.get('colorold', False) if rotation > 0: aug.append(transforms.RandomRotation(rotation)) if colorjitter is not None: aug.append(transforms.ColorJitter(*colorjitter)) aug.append(transforms.ToTensor()) if colorold: aug.append(ColorAugmentation()) aug.append(normalize) # train train_dataset = McDataset(config.train_root, config.train_source, transforms.Compose(aug), fake=args.fake) # val val_dataset = McDataset( config.val_root, config.val_source, transforms.Compose([ transforms.Resize(config.augmentation.test_resize), transforms.CenterCrop(config.augmentation.input_size), transforms.ToTensor(), normalize, ]), args.fake) train_sampler = DistributedGivenIterationSampler( train_dataset, config.lr_scheduler.max_iter, config.batch_size, last_iter=last_iter) val_sampler = DistributedSampler(val_dataset, round_up=False) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, sampler=train_sampler) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, sampler=val_sampler) config.lr_scheduler['optimizer'] = optimizer.optimizer if isinstance( optimizer, FP16SGD) else optimizer config.lr_scheduler['last_iter'] = last_iter lr_scheduler = get_scheduler(config.lr_scheduler) if rank == 0: tb_logger = SummaryWriter(config.save_path + '/events') logger = create_logger('global_logger', config.save_path + '/log.txt') logger.info('args: {}'.format(pprint.pformat(args))) logger.info('config: {}'.format(pprint.pformat(config))) else: tb_logger = None if args.evaluate: if args.fusion_list is not None: validate(val_loader, model, fusion_list=args.fusion_list, fuse_prob=args.fuse_prob) else: validate(val_loader, model) link.finalize() return train(train_loader, val_loader, model, optimizer, lr_scheduler, last_iter + 1, tb_logger) link.finalize()
def validate(val_loader, model, fusion_list=None, fuse_prob=False): batch_time = AverageMeter(0) losses = AverageMeter(0) top1 = AverageMeter(0) top5 = AverageMeter(0) # switch to evaluate mode if fusion_list is not None: model_list = [] for i in range(len(fusion_list)): model_list.append(model_entry(config.model)) model_list[i].cuda() model_list[i] = DistModule(model_list[i], args.sync) load_state(fusion_list[i], model_list[i]) model_list[i].eval() if fuse_prob: softmax = nn.Softmax(dim=1) else: model.eval() rank = link.get_rank() world_size = link.get_world_size() logger = logging.getLogger('global_logger') criterion = nn.CrossEntropyLoss() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): input = input.cuda() if not args.fp16 else input.half().cuda() target = target.cuda() # compute output if fusion_list is not None: output_list = [] for model_idx in range(len(fusion_list)): output = model_list[model_idx](input) if fuse_prob: output = softmax(output) output_list.append(output) output = torch.stack(output_list, 0) output = torch.mean(output, 0) else: output = model(input) # measure accuracy and record loss loss = criterion( output, target ) #/ world_size ## loss should not be scaled here, it's reduced later! prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) num = input.size(0) losses.update(loss.item(), num) top1.update(prec1.item(), num) top5.update(prec5.item(), num) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.print_freq == 0 and rank == 0: logger.info( 'Test: [{0}/{1}]\tTime {batch_time.val:.3f} ({batch_time.avg:.3f})' .format(i, len(val_loader), batch_time=batch_time)) # gather final results total_num = torch.Tensor([losses.count]) loss_sum = torch.Tensor([losses.avg * losses.count]) top1_sum = torch.Tensor([top1.avg * top1.count]) top5_sum = torch.Tensor([top5.avg * top5.count]) link.allreduce(total_num) link.allreduce(loss_sum) link.allreduce(top1_sum) link.allreduce(top5_sum) final_loss = loss_sum.item() / total_num.item() final_top1 = top1_sum.item() / total_num.item() final_top5 = top5_sum.item() / total_num.item() if rank == 0: logger.info( ' * Prec@1 {:.3f}\tPrec@5 {:.3f}\tLoss {:.3f}\ttotal_num={}'. format(final_top1, final_top5, final_loss, total_num.item())) model.train() return final_loss, final_top1, final_top5
def main(): ## config global args args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for k,v in config.items(): if isinstance(v, dict): argobj = ArgObj() setattr(args, k, argobj) for kk,vv in v.items(): setattr(argobj, kk, vv) else: setattr(args, k, v) args.ngpu = len(args.gpus.split(',')) ## asserts assert args.model.backbone in model_names, "available backbone names: {}".format(model_names) num_tasks = len(args.train.data_root) assert(num_tasks == len(args.train.loss_weight)) assert(num_tasks == len(args.train.batch_size)) assert(num_tasks == len(args.train.data_list)) #assert(num_tasks == len(args.train.data_meta)) if args.val.flag: assert(num_tasks == len(args.val.batch_size)) assert(num_tasks == len(args.val.data_root)) assert(num_tasks == len(args.val.data_list)) #assert(num_tasks == len(args.val.data_meta)) ## mkdir if not hasattr(args, 'save_path'): args.save_path = os.path.dirname(args.config) if not os.path.isdir('{}/checkpoints'.format(args.save_path)): os.makedirs('{}/checkpoints'.format(args.save_path)) if not os.path.isdir('{}/logs'.format(args.save_path)): os.makedirs('{}/logs'.format(args.save_path)) if not os.path.isdir('{}/events'.format(args.save_path)): os.makedirs('{}/events'.format(args.save_path)) ## create dataset if not (args.extract or args.evaluate): # train + val for i in range(num_tasks): args.train.batch_size[i] *= args.ngpu #train_dataset = [FaceDataset(args, idx, 'train') for idx in range(num_tasks)] train_dataset = [FileListLabeledDataset( args.train.data_list[i], args.train.data_root[i], transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize(args.model.input_size), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]), memcached=args.memcached, memcached_client=args.memcached_client) for i in range(num_tasks)] args.num_classes = [td.num_class for td in train_dataset] train_longest_size = max([int(np.ceil(len(td) / float(bs))) for td, bs in zip(train_dataset, args.train.batch_size)]) train_sampler = [GivenSizeSampler(td, total_size=train_longest_size * bs, rand_seed=args.train.rand_seed) for td, bs in zip(train_dataset, args.train.batch_size)] train_loader = [DataLoader( train_dataset[k], batch_size=args.train.batch_size[k], shuffle=False, num_workers=args.workers, pin_memory=False, sampler=train_sampler[k]) for k in range(num_tasks)] assert(all([len(train_loader[k]) == len(train_loader[0]) for k in range(num_tasks)])) if args.val.flag: for i in range(num_tasks): args.val.batch_size[i] *= args.ngpu #val_dataset = [FaceDataset(args, idx, 'val') for idx in range(num_tasks)] val_dataset = [FileListLabeledDataset( args.val.data_list[i], args.val.data_root[i], transforms.Compose([ transforms.Resize(args.model.input_size), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]), memcached=args.memcached, memcached_client=args.memcached_client) for idx in range(num_tasks)] val_longest_size = max([int(np.ceil(len(vd) / float(bs))) for vd, bs in zip(val_dataset, args.val.batch_size)]) val_sampler = [GivenSizeSampler(vd, total_size=val_longest_size * bs, sequential=True) for vd, bs in zip(val_dataset, args.val.batch_size)] val_loader = [DataLoader( val_dataset[k], batch_size=args.val.batch_size[k], shuffle=False, num_workers=args.workers, pin_memory=False, sampler=val_sampler[k]) for k in range(num_tasks)] assert(all([len(val_loader[k]) == len(val_loader[0]) for k in range(num_tasks)])) if args.test.flag or args.evaluate: # online or offline evaluate args.test.batch_size *= args.ngpu test_dataset = [] for tb in args.test.benchmark: if tb == 'megaface': test_dataset.append(FileListDataset(args.test.megaface_list, args.test.megaface_root, transforms.Compose([ transforms.Resize(args.model.input_size), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]))) else: test_dataset.append(BinDataset("{}/{}.bin".format(args.test.test_root, tb), transforms.Compose([ transforms.Resize(args.model.input_size), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), ]))) test_sampler = [GivenSizeSampler(td, total_size=int(np.ceil(len(td) / float(args.test.batch_size)) * args.test.batch_size), sequential=True, silent=True) for td in test_dataset] test_loader = [DataLoader( td, batch_size=args.test.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=ts) for td, ts in zip(test_dataset, test_sampler)] if args.extract: # feature extraction args.extract_info.batch_size *= args.ngpu # extract_dataset = FaceDataset(args, 0, 'extract') extract_dataset = FileListDataset( args.extract_info.data_list, args.extract_info.data_root, transforms.Compose([ transforms.Resize(args.model.input_size), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]), memcached=args.memcached, memcached_client=args.memcached_client) extract_sampler = GivenSizeSampler( extract_dataset, total_size=int(np.ceil(len(extract_dataset) / float(args.extract_info.batch_size)) * args.extract_info.batch_size), sequential=True) extract_loader = DataLoader( extract_dataset, batch_size=args.extract_info.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=extract_sampler) ## create model log("Creating model on [{}] gpus: {}".format(args.ngpu, args.gpus)) if args.evaluate or args.extract: args.num_classes = None model = models.MultiTaskWithLoss(backbone=args.model.backbone, num_classes=args.num_classes, feature_dim=args.model.feature_dim, spatial_size=args.model.input_size, arc_fc=args.model.arc_fc, feat_bn=args.model.feat_bn) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus model = nn.DataParallel(model) model.cuda() cudnn.benchmark = True ## criterion and optimizer optimizer = torch.optim.SGD(model.parameters(), args.train.base_lr, momentum=args.train.momentum, weight_decay=args.train.weight_decay) ## resume / load model start_epoch = 0 count = [0] if args.load_path: assert os.path.isfile(args.load_path), "File not exist: {}".format(args.load_path) if args.resume: checkpoint = load_state(args.load_path, model, optimizer) start_epoch = checkpoint['epoch'] count[0] = checkpoint['count'] else: load_state(args.load_path, model) ## offline evaluate if args.evaluate: for tb, tl, td in zip(args.test.benchmark, test_loader, test_dataset): evaluation(tl, model, num=len(td), outfeat_fn="{}_{}.bin".format(args.load_path[:-8], tb), benchmark=tb) return ## feature extraction if args.extract: extract(extract_loader, model, num=len(extract_dataset), output_file="{}_{}.bin".format(args.load_path[:-8], args.extract_info.data_name)) return ######################## train ################# ## lr scheduler lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.train.lr_decay_steps, gamma=args.train.lr_decay_scale, last_epoch=start_epoch-1) ## logger logging.basicConfig(filename=os.path.join('{}/logs'.format(args.save_path), 'log-{}-{:02d}-{:02d}_{:02d}:{:02d}:{:02d}.txt'.format( datetime.today().year, datetime.today().month, datetime.today().day, datetime.today().hour, datetime.today().minute, datetime.today().second)), level=logging.INFO) tb_logger = SummaryWriter('{}/events'.format(args.save_path)) ## initial validate if args.val.flag: validate(val_loader, model, start_epoch, args.train.loss_weight, len(train_loader[0]), tb_logger) ## initial evaluate if args.test.flag and args.test.initial_test: log("*************** evaluation epoch [{}] ***************".format(start_epoch)) for tb, tl, td in zip(args.test.benchmark, test_loader, test_dataset): res = evaluation(tl, model, num=len(td), outfeat_fn="{}/checkpoints/ckpt_epoch_{}_{}.bin".format( args.save_path, start_epoch, tb), benchmark=tb) tb_logger.add_scalar(tb, res, start_epoch) ## training loop for epoch in range(start_epoch, args.train.max_epoch): lr_scheduler.step() for ts in train_sampler: ts.set_epoch(epoch) # train for one epoch train(train_loader, model, optimizer, epoch, args.train.loss_weight, tb_logger, count) # save checkpoint save_state({ 'epoch': epoch + 1, 'arch': args.model.backbone, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), 'count': count[0] }, args.save_path + "/checkpoints/ckpt_epoch", epoch + 1, is_last=(epoch + 1 == args.train.max_epoch)) # validate if args.val.flag: validate(val_loader, model, epoch, args.train.loss_weight, len(train_loader[0]), tb_logger, count) # online evaluate if args.test.flag and ((epoch + 1) % args.test.interval == 0 or epoch + 1 == args.train.max_epoch): log("*************** evaluation epoch [{}] ***************".format(epoch + 1)) for tb, tl, td in zip(args.test.benchmark, test_loader, test_dataset): res = evaluation(tl, model, num=len(td), outfeat_fn="{}/checkpoints/ckpt_epoch_{}_{}.bin".format( args.save_path, epoch + 1, tb), benchmark=tb) tb_logger.add_scalar(tb, res, start_epoch)