def merge_button_press(*args):
    results = load_state(args[-2])
    data = results.get("data")
    previous_clicks = load_state(args[-1]).get("clicks")
    clicks = list(args[:-2])

    button_clicked = None

    for i, click in enumerate(clicks):
        if click is None:
            clicks[i] = 0

    # if not previous_clicks:
    previous_clicks = [0] * len(clicks)

    for i, click, previous_click in zip(range(len(clicks)), clicks,
                                        previous_clicks):
        # print(f"{click} vs {previous_click}")
        if click > previous_click:
            button_clicked = i
            break

    state = {
        "clicks": clicks,
        "button_clicked": button_clicked,
        "bucket_id": data["bucket id"][button_clicked],
    }

    return [merge.render_modal(button_clicked, data), save_state(state)]
示例#2
0
    def __init__(self,
                 num_classes,
                 pretrained=True,
                 pool_first=True,
                 **kwargs):
        super(RESNET18, self).__init__()

        self.resnet = torchvision.models.video.r3d_18(pretrained=False,
                                                      progress=False,
                                                      num_classes=num_classes,
                                                      **kwargs)

        #############
        # Initialization
        initializer.xavier(net=self)

        if pretrained:
            pretrained_model = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'pretrained/r3d_18-b3b3357e.pth')
            logging.info(
                "Network:: graph initialized, loading pretrained model: `{}'".
                format(pretrained_model))
            assert os.path.exists(
                pretrained_model), "cannot locate: `{}'".format(
                    pretrained_model)
            pretrained = torch.load(pretrained_model)
            # load_state(self.resnet, pretrained['state_dict'])
            load_state(self.resnet, pretrained)
        else:
            logging.info(
                "Network:: graph initialized, use random inilization!")
    def load_state(self, path, Iter, resume=False):
        path = os.path.join(path, "ckpt_iter_{}.pth.tar".format(Iter))

        if resume:
            utils.load_state(path, self.model, self.optim)
        else:
            utils.load_state(path, self.model)
def update_modal(result_button_clicks, search_state):
    print("Updating Modal")
    result_state = load_state(result_button_clicks)
    search_state = load_state(search_state)

    button_clicked = result_state.get("button_clicked")
    df = search_state.get("data")

    return modal.render_modal(button_clicked, df)
def toggle_merge_modal(n_clicks, merge_state, database_update):
    button_clicked = load_state(merge_state).get("button_clicked")
    database_update_status = load_state(database_update).get("status")

    if database_update_status:
        return False

    if n_clicks is None and button_clicked is not None:
        return True

    elif n_clicks is not None:
        return False

    return False
示例#6
0
文件: main.py 项目: cxmscb/RobNets
def main():
    global cfg

    cfg = Config.fromfile(args.config)

    cfg.save = '{}/{}-{}-{}'.format(cfg.save_path, cfg.model, cfg.dataset,
                                    time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(cfg.save)

    logger = utils.create_logger('global_logger', cfg.save + '/log.txt')

    if not torch.cuda.is_available():
        logger.info('no gpu device available')
        sys.exit(1)

    # Set cuda device & seed
    torch.cuda.set_device(cfg.gpu)
    np.random.seed(cfg.seed)
    cudnn.benchmark = True
    torch.manual_seed(cfg.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(cfg.seed)

    # Model
    print('==> Building model..')
    arch_code = eval('architecture_code.{}'.format(cfg.model))
    net = models.model_entry(cfg, arch_code)
    net = net.cuda()

    cfg.netpara = sum(p.numel() for p in net.parameters()) / 1e6
    logger.info('config: {}'.format(pprint.pformat(cfg)))

    # Load checkpoint.
    if not Debug:
        print('==> Resuming from checkpoint..')
        utils.load_state(cfg.resume_path, net)

    # Data
    print('==> Preparing data..')

    testloader = dataset_entry(cfg)
    criterion = nn.CrossEntropyLoss()
    net_adv = AttackPGD(net, cfg.attack_param)

    print('==> Testing on Clean Data..')
    test(net, testloader, criterion)

    print('==> Testing on Adversarial Data..')
    test(net_adv, testloader, criterion, adv=True)
示例#7
0
    def load(path):
        with open(path, "rb") as f:
            model_data, act_params = cloudpickle.load(f)
        act = deepq.build_act(**act_params)
        sess = tf.Session()
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            load_state(os.path.join(td, "model"))

        return ActWrapper(act, act_params)
def update_raw_string_bucket(update_clicks, modal_validity, bucket_id,
                             bucket_name, search_results, result_clicks):
    modal_validity = load_state(modal_validity)
    validity = modal_validity.get('validity')

    data = load_state(search_results).get('data')
    button_clicked = load_state(result_clicks).get("button_clicked")

    if validity is None:
        return save_state({'updated': False})

    if validity == INVALID:
        return save_state({'updated': False})

    bucket_id = bucket_id if bucket_id is not None else ""
    bucket_name = bucket_name if bucket_name is not None else ""

    valid_bucket_id = modal_validity.get("bucket_id")
    valid_bucket_name = modal_validity.get("bucket_name")

    # It's okay if valid bucket name is none. Means it's a new bucket.
    if validity == NEW:
        if valid_bucket_id != bucket_id:
            return save_state({'updated': False})

    if validity == EXISTS:
        if valid_bucket_id != bucket_id or valid_bucket_name != valid_bucket_name:
            return save_state({'updated': False})

    raw_string = data["raw string"][button_clicked].replace("'", "\'")

    if sql.execute(
            f"SELECT count(*) FROM reference.organization_buckets_edits WHERE raw_string = '{raw_string}';"
    )['count'][0] > 0:
        sql.execute(
            f"UPDATE reference.organization_buckets_edits SET bucket = '{bucket_name}' WHERE raw_string = '{raw_string}';"
        )
        sql.execute(
            f"UPDATE reference.organization_buckets_edits SET bucket_id = '{bucket_id}' WHERE raw_string = '{raw_string}';"
        )

    else:
        sql.execute(
            f"INSERT INTO reference.organization_buckets_edits (raw_string, bucket, bucket_id, time) VALUES ('{raw_string}', '{bucket_name}', '{bucket_id}', GETDATE());"
        )

    print(f"Database updated: {raw_string} to {bucket_id} ({bucket_name})")
    return save_state({'updated': True})
示例#9
0
def view_calibration(id):
    """View control page for a camera
        Args:
            id (str): Identifier name of camera
        Returns:
            HTML page
    """
    # TODO:  if no  session['UPLOAD_TOKEN'], redirect to page that asks one.
    if not is_existing_id(id):
        return "", 404
    small_url = url_for("image_view", id=id, image="small")
    crop_url = url_for("image_view", id=id, image="crop")
    state = load_state(id)
    if request.method == "POST":
        mode = request.form["mode"]
        state["server"]["mode"] = mode
        state["server"]["exposure_modifier"] = round(
            get_float(request.form["exposure_modifier"], 1.0), 2)
        refresh_state(id, state["server"], "server")

    state_view = json.dumps(state, indent=4, sort_keys=True)
    return render_template(
        "view.html",
        id=id,
        small_url=small_url,
        crop_url=crop_url,
        state=state,
        state_view=state_view,
    )
def update_update_button_clicks(*args):

    clicks = args[:-1]
    previous_state = load_state(args[-1])

    button_clicked = -1

    # If previous state doesn't exist, then just look for a click:
    if not args[-1]:
        for i, click in enumerate(clicks):
            if click:
                button_clicked = i
                break

    # If previous state does exist, we need to find the click that increased.
    else:
        previous_clicks = previous_state.get("clicks")
        for i, n_click, last_n_click in zip(range(len(clicks)), clicks,
                                            previous_clicks):
            if n_click:
                if n_click > last_n_click:
                    button_clicked = i

    # Ensure we've got no "None" clicks to store:
    clicks = list(clicks)
    for i, click in enumerate(clicks):
        if not click:
            clicks[i] = 0

    # Save state:
    return save_state({"clicks": clicks, "button_clicked": button_clicked})
def update_state_search(n_clicks, raw_string_n_submits, bucket_name_n_submits,
                        bucket_id_n_submits, database_updated, previous_state,
                        raw_string, bucket_name, bucket_id):
    # print(f"Called Update State of Search Bar {raw_string}")
    previous_state = load_state(previous_state)

    # Determine Whether to Run Search:
    # We require one of n_clicks/n_submits to be not None
    # The callback is called once before anything is populated.
    # In this case, we just ignore it. Otherwise, we want to run search.

    run_search = True if n_clicks else \
        True if raw_string_n_submits else \
        True if bucket_name_n_submits else \
        True if bucket_id_n_submits else False

    # If we run the search, do it!
    if run_search:
        raw_string = None if raw_string == '' else raw_string
        bucket_name = None if bucket_name == '' else bucket_name
        bucket_id = None if bucket_id == '' else bucket_id

        if raw_string or bucket_name or bucket_id:
            data = sql.execute(f"""
                SELECT 
                    raw_string as "Raw String"
                    , coalesce(edits.bucket, original.bucket) as "Bucket Name"
                    , coalesce(edits.bucket_id, original.bucket_id) as "Bucket ID"
                    , original.bucket_id as "Original Bucket ID"
                    , original.has_new_bucket as "New Bucket"
                FROM staging.organization_buckets original
                LEFT JOIN reference.organization_buckets_edits edits USING(raw_string)
                WHERE original.raw_string <> '' AND original.bucket <> '' {
                    f" AND (original.raw_string ~* '{raw_string}' ) " if raw_string else ''
                }{
                    f" AND (original.bucket ~* '{bucket_name}' ) " if bucket_name else ''
                }
                {
                    f" AND (original.bucket_id ~* '{bucket_id}' ) " if bucket_id else ''
                }
                ORDER BY CASE WHEN original.has_new_bucket THEN 1 ELSE 0 END DESC, 3,2,1
                LIMIT {results.N_ROWS}
            """)
        else:
            data = None
    else:
        data = None

    # Write out the current search state.
    new_state = {
        "raw_string": raw_string,
        "bucket_name": bucket_name,
        "bucket_id": bucket_id,
        "run_search": run_search,
        "data": data
    }

    return (save_state(new_state),
            results.generate_results_table(new_state.get("data")))
示例#12
0
    def load_state(self, root, Iter, resume=False):
        path = os.path.join(root, "ckpt_iter_{}.pth.tar".format(Iter))
        netD_path = os.path.join(root, "D_iter_{}.pth.tar".format(Iter))

        if resume:
            utils.load_state(path, self.model, self.optim)
            utils.load_state(netD_path, self.netD, self.optimD)
        else:
            utils.load_state(path, self.model)
            utils.load_state(netD_path, self.netD)
def toggle_modal(result_clicks, close_n_clicks, database_update,
                 previous_is_open, button_clicks):
    button_clicks = load_state(button_clicks)
    button_clicked = button_clicks.get("button_clicked", -1)
    print(f"Toggling Modal to {not previous_is_open}")

    if close_n_clicks is not None or (button_clicked >= 0):
        return not previous_is_open
    return previous_is_open
示例#14
0
    def load_state(self, path, Iter, resume=False):
        model_path = os.path.join(path, "ckpt_iter_{}.pth.tar".format(Iter))
        discriminator_path = os.path.join(path,
                                          "D_iter_{}.pth.tar".format(Iter))

        if resume:
            utils.load_state(model_path, self.model, self.optim)
            utils.load_state(discriminator_path, self.netD, self.optimD)
        else:
            utils.load_state(model_path, self.model)
            utils.load_state(discriminator_path, self.netD)
示例#15
0
def ajax_get_state(id):
    """Get the current reported state from a camera as a formatted string
        Args:
            id (str): Identifier name of camera
        Returns:
            string
    """
    if not is_existing_id(id):
        return "", 404
    state_view = json.dumps(load_state(id), indent=4, sort_keys=True)
    return state_view, 200
示例#16
0
    def train(self):
        self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics)
        while True:
            info = self.agent.step()
            if info['update']:
                logger.logkvs(info['update'])
                logger.dumpkvs()
            if self.agent.rollout.stats['tcount'] == 0:
                fname = os.path.join(self.hps['save_dir'], 'checkpoints')
                if os.path.exists(fname+'.index'):
                    load_state(fname)
                    print('load successfully')
                else:
                    print('fail to load')
            if self.agent.rollout.stats['tcount']%int(self.num_timesteps/self.num_timesteps)==0:
                fname = os.path.join(self.hps['save_dir'], 'checkpoints')
                save_state(fname)
            if self.agent.rollout.stats['tcount'] > self.num_timesteps:
                break
            # print(self.agent.rollout.stats['tcount'])

        self.agent.stop_interaction()
示例#17
0
def test(**kwargs):
    opt.parse(kwargs)
    dataset = OCRDataset('data/images', 'data/labels', 'data/test.imglist',
                         opt.input_size, 'test', opt.chars_list, opt.max_seq)
    dataloader = DataLoader(dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_works)

    model = getattr(models, opt.model)(opt.basenet,
                                       opt.input_size,
                                       opt.max_seq,
                                       opt.num_classes,
                                       mode='test',
                                       attn=opt.attn)
    load_state(model, opt.load_model_path, "cuda:%d" % opt.gpus[0])
    model = gpu(model, opt)
    model.eval()
    t_score = 0.3
    match, all = 0, 0
    for inputs, text in dataloader:
        inputs = gpu(inputs, opt)
        with torch.no_grad():
            outputs = model(inputs)
        outputs = word_format(outputs, t_score)
        outputs = outputs[0].detach().cpu().numpy()
        outputs = outputs[np.where(np.max(outputs, 1) != 0)[0]]
        idx = np.argmax(outputs, 1)
        idx = idx[np.where(idx != 0)[0]]
        preds = ''.join([opt.chars_list[i] for i in idx])
        text = text[0]
        if text == preds:
            match += 1
        else:
            print('text/pred:%s,%s' % (text, preds))
        all += 1
        torch.cuda.empty_cache()
    print('match/all(%2f): %d/%d' % (match / all, match, all))
def update_search_results(id_submits, name_submits, n_clicks, database_update,
                          bucket_id, bucket_name, previous_search):

    previous_search = load_state(previous_search)
    database_update = load_state(database_update)

    # n_triggers = id_submits if id_submits else 0
    #     + name_submits if name_submits else 0
    #     + n_clicks if n_clicks else 0

    # previous_triggers = previous_search.get("triggers", 0)

    # if n_triggers > previous_triggers or database_update.get("status"):
    search_results = results.search(bucket_id, bucket_name)

    state = save_state({
        'data': search_results,
        # 'n_triggers': n_triggers
    })

    layout = results.generate_result_table(search_results)

    return [state, layout]
def merge_bucket(n_clicks, new_bucket_id, merge_state):
    merge_state = load_state(merge_state)

    is_valid = merge.validate(new_bucket_id)
    status = False
    old_bucket_id = None
    if is_valid:
        old_bucket_id = merge_state.get("bucket_id")
        status = merge.merge(old_bucket_id, new_bucket_id)

    # print(status)
    return save_state({
        'status': status,
    })
示例#20
0
def train(train, model, criterion, optimizer, n_lettres, n_epochs, log_dir,
          checkpoint_path):
    losses = []
    writer = SummaryWriter(log_dir=log_dir)

    pbar = tqdm(range(n_epochs), total=n_epochs, file=sys.stdout)

    state = load_state(checkpoint_path, model, optimizer)

    for i in pbar:
        l = []
        for x, y in train:

            x = x.squeeze(-1).permute(1, 0, -1).to(device)
            seq_len, batch_size, embeding = x.shape

            y = y.view(seq_len * batch_size).to(device)

            o = state.model(x, state.model.initHidden(batch_size).to(device))
            d = state.model.decode(o).view(seq_len * batch_size, embeding)

            loss = criterion(d, y)
            loss.backward()

            state.optimizer.step()
            state.optimizer.zero_grad()

            l.append(loss.item())

            state.iteration += 1

        state.epoch += 1
        save_state(checkpoint_path, state)

        lo = np.mean(l)
        losses.append(lo)
        # \tTest: Loss: {np.round(test_lo, 4)}
        pbar.set_description(f'Train: Loss: {np.round(lo, 4)}')

        writer.add_scalar('Loss/train', lo, i)

    return losses
def respond_to_validity(modal_validity):
    modal_validity = load_state(modal_validity)

    validity = modal_validity.get("validity", INVALID)

    disabled = False
    update_button_color = "btn-danger"
    new_bucket_name = modal_validity.get("bucket_name")
    new_bucket_name_disabled = True
    new_bucket_warning = ""

    if validity == INVALID:
        disabled = True
        update_button_color = "btn-danger"
    elif validity == NEW:
        update_button_color = "btn-warning"
        new_bucket_name_disabled = False
        new_bucket_warning = "Please choose a name for your new bucket."
    elif validity == EXISTS:
        update_button_color = "btn-success"

    return (disabled, update_button_color,
            new_bucket_name if new_bucket_name is not None else "",
            new_bucket_name_disabled, new_bucket_warning)
示例#22
0
    def __init__(
            self,
            depth=50,
            pretrained=True,
            # pretrained2d=True,
            num_stages=4,
            spatial_strides=(1, 2, 2, 2),
            temporal_strides=(1, 1, 1, 1),
            dilations=(1, 1, 1, 1),
            out_indices=[3],
            conv1_kernel_t=5,
            conv1_stride_t=2,
            pool1_kernel_t=1,
            pool1_stride_t=2,
            style='pytorch',
            frozen_stages=-1,
            inflate_freq=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0),
                          (0, 1, 0)),  # For C2D baseline, this is set to -1.
            inflate_stride=(1, 1, 1, 1),
            inflate_style='3x1x1',
            nonlocal_stages=(-1, ),
            nonlocal_freq=(0, 0, 0, 0),
            # nonlocal_freq=(0, 1, 1, 0), # Default setting
            nonlocal_cfg=None,
            bn_eval=False,
            bn_frozen=False,
            partial_bn=False,
            with_cp=False,
            num_classes=11):
        super(Res_I3D, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError('invalid depth {} for resnet'.format(depth))
        self.depth = depth
        self.pretrained = pretrained
        # self.pretrained2d = pretrained2d
        self.num_stages = num_stages
        assert num_stages >= 1 and num_stages <= 4
        self.spatial_strides = spatial_strides
        self.temporal_strides = temporal_strides
        self.dilations = dilations
        assert len(spatial_strides) == len(temporal_strides) == len(
            dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.style = style
        self.frozen_stages = frozen_stages
        self.inflate_freqs = inflate_freq if not isinstance(
            inflate_freq, int) else (inflate_freq, ) * num_stages
        self.inflate_style = inflate_style
        self.nonlocal_stages = nonlocal_stages
        self.nonlocal_freqs = nonlocal_freq if not isinstance(
            nonlocal_freq, int) else (nonlocal_freq, ) * num_stages
        self.nonlocal_cfg = nonlocal_cfg
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.partial_bn = partial_bn
        self.with_cp = with_cp

        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = 64

        self.conv1 = nn.Conv3d(3,
                               64,
                               kernel_size=(conv1_kernel_t, 7, 7),
                               stride=(conv1_stride_t, 2, 2),
                               padding=((conv1_kernel_t - 1) // 2, 3, 3),
                               bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(pool1_kernel_t, 3, 3),
                                    stride=(pool1_stride_t, 2, 2),
                                    padding=(pool1_kernel_t // 2, 1, 1))
        #TODO: Check whether pad=0 differs a lot
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 1, 1),
                                  stride=(2, 1, 1),
                                  padding=(0, 0, 0))

        self.avgpool = SimpleSpatialTemporalModule(spatial_type='avg',
                                                   temporal_size=2,
                                                   spatial_size=7)
        self.cls_head = ClsHead(with_avg_pool=False,
                                temporal_feature_size=1,
                                spatial_feature_size=1,
                                dropout_ratio=0.5,
                                in_channels=2048,
                                num_classes=num_classes)

        self.res_layers = []
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            temporal_stride = temporal_strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            res_layer = make_res_layer(self.block,
                                       self.inplanes,
                                       planes,
                                       num_blocks,
                                       spatial_stride=spatial_stride,
                                       temporal_stride=temporal_stride,
                                       dilation=dilation,
                                       style=self.style,
                                       inflate_freq=self.inflate_freqs[i],
                                       inflate_style=self.inflate_style,
                                       nonlocal_freq=self.nonlocal_freqs[i],
                                       nonlocal_cfg=self.nonlocal_cfg
                                       if i in self.nonlocal_stages else None,
                                       with_cp=with_cp)
            self.inplanes = planes * self.block.expansion
            layer_name = 'layer{}'.format(i + 1)
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)

        self.feat_dim = self.block.expansion * 64 * 2**(
            len(self.stage_blocks) - 1)

        #############
        # Initialization

        initializer.xavier(net=self)

        if pretrained:
            pretrained_model = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'pretrained/i3d_kinetics_rgb_r50_c3d.pth')
            logging.info(
                "Network:: graph initialized, loading pretrained model: `{}'".
                format(pretrained_model))
            assert os.path.exists(
                pretrained_model), "cannot locate: `{}'".format(
                    pretrained_model)
            pretrained = torch.load(pretrained_model)
            load_state(self, pretrained['state_dict'])
        else:
            logging.info(
                "Network:: graph initialized, use random inilization!")
示例#23
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.load(f)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    #print("=> creating model '{}'".format(args.model))
    #if 'se_resnext50_32x4d_v1_sn' in args.model:
    #    model = models.__dict__[args.model](using_moving_average = args.using_moving_average, last_gamma=args.last_gamma)
    #else:
    #    model = models.__dict__[args.model](using_moving_average=args.using_moving_average)
    #model = resnet18()
    model = ResNet18()
    #model = SENet18()

    if not args.distributed:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # auto resume from a checkpoint
    model_dir = args.model_dir
    start_epoch = 0
    if not os.path.exists(model_dir) :
        os.makedirs(model_dir)
    if args.evaluate:
        utils.load_state_ckpt(args.checkpoint_path, model)
    else:
        best_prec1, start_epoch = utils.load_state(model_dir, model, optimizer=optimizer)
    writer = SummaryWriter(model_dir)

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(
      datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
      ])),
      batch_size=args.batch_size, shuffle=False,
      num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, writer)
        return

    train_dataset_multi_scale = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ColorAugmentation(),
            normalize,
        ]))

    train_dataset = datasets.ImageFolder(
      traindir,
      transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        ColorAugmentation(),
        normalize,
      ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader_multi_scale = torch.utils.data.DataLoader(
        train_dataset_multi_scale, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    train_loader = torch.utils.data.DataLoader(
      train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
      num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    if not args.using_moving_average:
        train_dataset_snhelper = datasets.ImageFolder(
          traindir,
          transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
          ]))
        train_loader_snhelper = torch.utils.data.DataLoader(
          train_dataset_snhelper, batch_size=args.batch_size * torch.cuda.device_count(), shuffle=(train_sampler is None),
          #train_dataset_snhelper, batch_size=1, shuffle=(train_sampler is None),
          num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    niters = len(train_loader)

    lr_scheduler = LRScheduler(optimizer, niters, args)

    for epoch in range(start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        if epoch < args.epochs - 5:
            train(train_loader_multi_scale, model, criterion, optimizer, lr_scheduler, epoch, writer)
        else:
            train(train_loader, model, criterion, optimizer, lr_scheduler, epoch, writer)

        if not args.using_moving_average:
            sn_helper(train_loader_snhelper, model)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch, writer)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        utils.save_checkpoint(model_dir, {
          'epoch': epoch + 1,
          'model': args.model,
          'state_dict': model.state_dict(),
          'best_prec1': best_prec1,
          'optimizer': optimizer.state_dict(),
        }, is_best)
示例#24
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for k, v in config['common'].items():
        setattr(args, k, v)
    torch.cuda.manual_seed(int(time.time()) % 1000)
    # create model
    print("=> creating model '{}'".format(args.arch))
    if args.arch.startswith('inception_v3'):
        print('inception_v3 without aux_logits!')
        image_size = 341
        input_size = 299
        model = models.__dict__[args.arch](aux_logits=True,
                                           num_classes=1000,
                                           pretrained=args.pretrained)
    else:
        image_size = 182
        input_size = 160
        student_model = models.__dict__[args.arch](
            num_classes=args.num_classes,
            pretrained=args.pretrained,
            avgpool_size=input_size / 32)
    student_model.cuda()
    student_params = list(student_model.parameters())

    student_optimizer = torch.optim.Adam(student_model.parameters(),
                                         args.base_lr * 0.1)

    args.save_path = "checkpoint/" + args.exp_name

    if not osp.exists(args.save_path):
        os.mkdir(args.save_path)

    tb_logger = SummaryWriter(args.save_path)
    logger = create_logger('global_logger', args.save_path + '/log.txt')

    for key, val in vars(args).items():
        logger.info("{:16} {}".format(key, val))

    criterion = nn.CrossEntropyLoss()
    print("Build network")
    last_iter = -1
    best_prec1 = 0
    load_state(args.save_path + "/ckptmodel_best.pth.tar", student_model)

    cudnn.benchmark = True

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    se_normalize = se_transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                           std=[0.229, 0.224, 0.225])

    border_value = int(np.mean([0.485, 0.456, 0.406]) * 255 + 0.5)
    test_aug = se_transforms.ImageAugmentation(True,
                                               0,
                                               rot_std=0.0,
                                               scale_u_range=[0.75, 1.333],
                                               affine_std=0,
                                               scale_x_range=None,
                                               scale_y_range=None)

    val_dataset = NormalDataset(args.val_root,
                                "./data/visda/list/validation_list.txt",
                                transform=transforms.Compose([
                                    se_transforms.ScaleAndCrop(
                                        (input_size, input_size), args.padding,
                                        False, np.array([0.485, 0.456, 0.406]),
                                        np.array([0.229, 0.224, 0.225]))
                                ]),
                                is_train=False,
                                args=args)

    val_loader = DataLoader(val_dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=args.workers)

    val_multi_dataset = NormalDataset(
        args.val_root,
        "./data/visda/list/validation_list.txt",
        transform=transforms.Compose([
            se_transforms.ScaleCropAndAugmentAffineMultiple(
                16, (input_size, input_size), args.padding, True, test_aug,
                border_value, np.array([0.485, 0.456, 0.406]),
                np.array([0.229, 0.224, 0.225]))
        ]),
        is_train=False,
        args=args)

    val_multi_loader = DataLoader(val_multi_dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=args.workers)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        student_optimizer, args.lr_steps, args.lr_gamma)
    #logger.info('{}'.format(args))
    validate(val_loader, student_model, criterion)
    validate_multi(val_multi_loader, student_model, criterion)
    # dataset
    dataset = load_cifar10(args.dataset_root, 'test')
    #dataset = None


    # model
    ranks = []
    if args.is_me:
        ranks = [args.d_c, args.d_s1, args.d_s2]
    else:
        ranks = [args.d_c, args.d_s1]

    args.ranks = ranks

    model = WoodburyGlow(args)
    if args.cuda:
        model = model.cuda()
    assert args.model_path != "", (print("need to load a model"))
    state = load_state(args.model_path, args.cuda)
    model.load_state_dict(state["model"])
    del state

    print("number of parameters: {}".format(count_parameters(model)))



    # begin to test
    inferencer = Inferencer(model, dataset, args)
    #inferencer.Inference()
    inferencer.Sample(args.n_samples, args.sample_each_row)
示例#26
0
def main():
    global args, config, X

    args = parser.parse_args()
    print(args)

    with open(args.config) as f:
        config = EasyDict(yaml.load(f))

    config.save_path = os.path.dirname(args.config)

    ####### regular set up
    assert torch.cuda.is_available()
    device = torch.device("cuda")
    config.device = device

    # random seed setup
    print("Random Seed: ", config.seed)
    random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    cudnn.benchmark = True

    ####### regular set up end


    netG = torch.nn.DataParallel(NetG(ngf=config.ngf))
    netD = torch.nn.DataParallel(NetD(ndf=config.ndf))

    netF = torch.nn.DataParallel(NetF())
    netI = torch.nn.DataParallel(NetI()).eval()
    for param in netF.parameters():
        param.requires_grad = False

    criterion_MSE = nn.MSELoss()

    fixed_sketch = torch.tensor(0, device=device).float()
    fixed_hint = torch.tensor(0, device=device).float()
    fixed_sketch_feat = torch.tensor(0, device=device).float()

    ####################
    netD = netD.to(device)
    netG = netG.to(device)
    netF = netF.to(device)
    netI = netI.to(device)
    criterion_MSE = criterion_MSE.to(device)

    # setup optimizer

    optimizerG = optim.Adam(netG.parameters(), lr=config.lr_scheduler.base_lr, betas=(0.5, 0.9))
    optimizerD = optim.Adam(netD.parameters(), lr=config.lr_scheduler.base_lr, betas=(0.5, 0.9))

    last_iter = -1
    best_fid = 1e6

    if args.resume:
        best_fid, last_iter = load_state(args.resume, netG, netD, optimizerG, optimizerD)

    config.lr_scheduler['last_iter'] = last_iter

    config.lr_scheduler['optimizer'] = optimizerG
    lr_schedulerG = get_scheduler(config.lr_scheduler)
    config.lr_scheduler['optimizer'] = optimizerD
    lr_schedulerD = get_scheduler(config.lr_scheduler)

    tb_logger = SummaryWriter(config.save_path + '/events')
    logger = create_logger('global_logger', config.save_path + '/log.txt')
    logger.info(f'args: {pprint.pformat(args)}')
    logger.info(f'config: {pprint.pformat(config)}')

    batch_time = AverageMeter(config.print_freq)
    data_time = AverageMeter(config.print_freq)
    flag = 1
    mu, sigma = 1, 0.005
    X = stats.truncnorm((0 - mu) / sigma, (1 - mu) / sigma, loc=mu, scale=sigma)
    i = 0
    curr_iter = last_iter + 1

    dataloader = train_loader(config)
    data_iter = iter(dataloader)

    end = time.time()
    while i < len(dataloader):
        lr_schedulerG.step(curr_iter)
        lr_schedulerD.step(curr_iter)
        current_lr = lr_schedulerG.get_lr()[0]
        ############################
        # (1) Update D network
        ###########################
        for p in netD.parameters():  # reset requires_grad
            p.requires_grad = True  # they are set to False below in netG update
        for p in netG.parameters():
            p.requires_grad = False  # to avoid computation ft_params

        # train the discriminator Diters times
        j = 0
        while j < config.diters:
            netD.zero_grad()

            i += 1
            j += 1

            data_end = time.time()
            real_cim, real_vim, real_sim = data_iter.next()
            data_time.update(time.time() - data_end)

            real_cim, real_vim, real_sim = real_cim.to(device), real_vim.to(device), real_sim.to(device)
            mask = mask_gen()
            hint = torch.cat((real_vim * mask, mask), 1)

            # train with fake
            with torch.no_grad():
                feat_sim = netI(real_sim).detach()
                fake_cim = netG(real_sim, hint, feat_sim).detach()

            errD_fake = netD(fake_cim, feat_sim)
            errD_fake = errD_fake.mean(0).view(1)

            errD_fake.backward(retain_graph=True)  # backward on score on real

            errD_real = netD(real_cim, feat_sim)
            errD_real = errD_real.mean(0).view(1)
            errD = errD_real - errD_fake

            errD_realer = -1 * errD_real + errD_real.pow(2) * config.drift

            errD_realer.backward(retain_graph=True)  # backward on score on real

            gradient_penalty = calc_gradient_penalty(netD, real_cim, fake_cim, feat_sim)
            gradient_penalty.backward()

            optimizerD.step()

        ############################
        # (2) Update G network
        ############################

        for p in netD.parameters():
            p.requires_grad = False  # to avoid computation
        for p in netG.parameters():
            p.requires_grad = True
        netG.zero_grad()

        data = data_iter.next()
        real_cim, real_vim, real_sim = data
        i += 1

        real_cim, real_vim, real_sim = real_cim.to(device), real_vim.to(device), real_sim.to(device)

        if flag:  # fix samples
            mask = mask_gen()
            hint = torch.cat((real_vim * mask, mask), 1)
            with torch.no_grad():
                feat_sim = netI(real_sim).detach()

            tb_logger.add_image('target imgs', vutils.make_grid(real_cim.mul(0.5).add(0.5), nrow=4))
            tb_logger.add_image('sketch imgs', vutils.make_grid(real_sim.mul(0.5).add(0.5), nrow=4))
            tb_logger.add_image('hint', vutils.make_grid((real_vim * mask).mul(0.5).add(0.5), nrow=4))

            fixed_sketch.resize_as_(real_sim).copy_(real_sim)
            fixed_hint.resize_as_(hint).copy_(hint)
            fixed_sketch_feat.resize_as_(feat_sim).copy_(feat_sim)

            flag -= 1

        mask = mask_gen()
        hint = torch.cat((real_vim * mask, mask), 1)

        with torch.no_grad():
            feat_sim = netI(real_sim).detach()

        fake = netG(real_sim, hint, feat_sim)

        errd = netD(fake, feat_sim)
        errG = errd.mean() * config.advW * -1
        errG.backward(retain_graph=True)
        feat1 = netF(fake)
        with torch.no_grad():
            feat2 = netF(real_cim)

        contentLoss = criterion_MSE(feat1, feat2)
        contentLoss.backward()

        optimizerG.step()
        batch_time.update(time.time() - end)

        ############################
        # (3) Report & 100 Batch checkpoint
        ############################
        curr_iter += 1

        if curr_iter % config.print_freq == 0:
            tb_logger.add_scalar('VGG MSE Loss', contentLoss.item(), curr_iter)
            tb_logger.add_scalar('wasserstein distance', errD.item(), curr_iter)
            tb_logger.add_scalar('errD_real', errD_real.item(), curr_iter)
            tb_logger.add_scalar('errD_fake', errD_fake.item(), curr_iter)
            tb_logger.add_scalar('Gnet loss toward real', errG.item(), curr_iter)
            tb_logger.add_scalar('gradient_penalty', gradient_penalty.item(), curr_iter)
            tb_logger.add_scalar('lr', current_lr, curr_iter)
            logger.info(f'Iter: [{curr_iter}/{len(dataloader)//(config.diters+1)}]\t'
                        f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        f'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                        f'errG {errG.item():.4f}\t'
                        f'errD {errD.item():.4f}\t'
                        f'err_D_real {errD_real.item():.4f}\t'
                        f'err_D_fake {errD_fake.item():.4f}\t'
                        f'content loss {contentLoss.item():.4f}\t'
                        f'LR {current_lr:.4f}'
                        )

        if curr_iter % config.print_img_freq == 0:
            with torch.no_grad():
                fake = netG(fixed_sketch, fixed_hint, fixed_sketch_feat)
                tb_logger.add_image('colored imgs',
                                    vutils.make_grid(fake.detach().mul(0.5).add(0.5), nrow=4),
                                    curr_iter)

        if curr_iter % config.val_freq == 0:
            fid, var = validate(netG, netI)
            tb_logger.add_scalar('fid_val', fid, curr_iter)
            tb_logger.add_scalar('fid_variance', var, curr_iter)
            logger.info(f'fid: {fid:.3f} ({var})\t')

            # remember best fid and save checkpoint
            is_best = fid < best_fid
            best_fid = min(fid, best_fid)
            save_checkpoint({
                'step': curr_iter - 1,
                'state_dictG': netG.state_dict(),
                'state_dictD': netD.state_dict(),
                'best_fid': best_fid,
                'optimizerG': optimizerG.state_dict(),
                'optimizerD': optimizerD.state_dict(),
            }, is_best, config.save_path + '/ckpt')

        end = time.time()
示例#27
0
import matplotlib.pyplot as plt
from utils import load_state, load_train_logger

if __name__ == '__main__':
    state = load_state('checkpoints/stn7/epoch_20.pth')
    train_logger = load_train_logger(state)
    losses = train_logger.epoch_losses()
    epoches = range(1, train_logger.n_epoches() + 1)
    plt.plot(epoches, losses)
    plt.xticks(epoches)
    plt.xlabel('epoches')
    plt.ylabel('loss')
    plt.show()
示例#28
0
            matches.update(match)
            discovered_matchIds[matchId] = True

            # Sleep to stay under the API data rate limit
            time.sleep(TIME_SLEEP)

        if loop_count % CHECKPOINT_INTERVAL == 0:
            # Save data every CHECKPOINT_INTERVAL number of summonerIds
            checkpoint_num += 1
            utils.save_state(checkpoint_num, matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop)
            return ""


if __name__ == '__main__':
    rg = riot_games_api.RiotGames('lol/riot_games_api.key')

    # Read parameters from terminal
    summonerId = int(sys.argv[1])
    checkpoint_num = int(sys.argv[2])

    print "Starting at summonerId %d and checkpoint num %d ..." % (summonerId, checkpoint_num)

    # Initialize or load checkpoint data
    if checkpoint_num == -1:
        matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop = initialize(summonerId)
    else:
        matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop = utils.load_state(checkpoint_num)

    # They call it a mine! A MINE!!
    mine(checkpoint_num, matches, discovered_summonerIds, discovered_matchIds, g, max_hop, bfs_queue, hop)
示例#29
0
def main():
    global args, config, best_prec1
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    config = EasyDict(config['common'])
    config.save_path = os.path.dirname(args.config)

    rank, world_size = dist_init()

    # create model
    bn_group_size = config.model.kwargs.bn_group_size
    bn_var_mode = config.model.kwargs.get('bn_var_mode', 'L2')
    if bn_group_size == 1:
        bn_group = None
    else:
        assert world_size % bn_group_size == 0
        bn_group = simple_group_split(world_size, rank,
                                      world_size // bn_group_size)

    config.model.kwargs.bn_group = bn_group
    config.model.kwargs.bn_var_mode = (link.syncbnVarMode_t.L1 if bn_var_mode
                                       == 'L1' else link.syncbnVarMode_t.L2)
    model = model_entry(config.model)
    if rank == 0:
        print(model)

    model.cuda()

    if config.optimizer.type == 'FP16SGD' or config.optimizer.type == 'FusedFP16SGD':
        args.fp16 = True
    else:
        args.fp16 = False

    if args.fp16:
        # if you have modules that must use fp32 parameters, and need fp32 input
        # try use link.fp16.register_float_module(your_module)
        # if you only need fp32 parameters set cast_args=False when call this
        # function, then call link.fp16.init() before call model.half()
        if config.optimizer.get('fp16_normal_bn', False):
            print('using normal bn for fp16')
            link.fp16.register_float_module(link.nn.SyncBatchNorm2d,
                                            cast_args=False)
            link.fp16.register_float_module(torch.nn.BatchNorm2d,
                                            cast_args=False)
            link.fp16.init()
        model.half()

    model = DistModule(model, args.sync)

    # create optimizer
    opt_config = config.optimizer
    opt_config.kwargs.lr = config.lr_scheduler.base_lr
    if config.get('no_wd', False):
        param_group, type2num = param_group_no_wd(model)
        opt_config.kwargs.params = param_group
    else:
        opt_config.kwargs.params = model.parameters()

    optimizer = optim_entry(opt_config)

    # optionally resume from a checkpoint
    last_iter = -1
    best_prec1 = 0
    if args.load_path:
        if args.recover:
            best_prec1, last_iter = load_state(args.load_path,
                                               model,
                                               optimizer=optimizer)
        else:
            load_state(args.load_path, model)

    cudnn.benchmark = True

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # augmentation
    aug = [
        transforms.RandomResizedCrop(config.augmentation.input_size),
        transforms.RandomHorizontalFlip()
    ]

    for k in config.augmentation.keys():
        assert k in [
            'input_size', 'test_resize', 'rotation', 'colorjitter', 'colorold'
        ]
    rotation = config.augmentation.get('rotation', 0)
    colorjitter = config.augmentation.get('colorjitter', None)
    colorold = config.augmentation.get('colorold', False)

    if rotation > 0:
        aug.append(transforms.RandomRotation(rotation))

    if colorjitter is not None:
        aug.append(transforms.ColorJitter(*colorjitter))

    aug.append(transforms.ToTensor())

    if colorold:
        aug.append(ColorAugmentation())

    aug.append(normalize)

    # train
    train_dataset = McDataset(config.train_root,
                              config.train_source,
                              transforms.Compose(aug),
                              fake=args.fake)

    # val
    val_dataset = McDataset(
        config.val_root, config.val_source,
        transforms.Compose([
            transforms.Resize(config.augmentation.test_resize),
            transforms.CenterCrop(config.augmentation.input_size),
            transforms.ToTensor(),
            normalize,
        ]), args.fake)

    train_sampler = DistributedGivenIterationSampler(
        train_dataset,
        config.lr_scheduler.max_iter,
        config.batch_size,
        last_iter=last_iter)
    val_sampler = DistributedSampler(val_dataset, round_up=False)

    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=False,
                              num_workers=config.workers,
                              pin_memory=True,
                              sampler=train_sampler)

    val_loader = DataLoader(val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            num_workers=config.workers,
                            pin_memory=True,
                            sampler=val_sampler)

    config.lr_scheduler['optimizer'] = optimizer.optimizer if isinstance(
        optimizer, FP16SGD) else optimizer
    config.lr_scheduler['last_iter'] = last_iter
    lr_scheduler = get_scheduler(config.lr_scheduler)

    if rank == 0:
        tb_logger = SummaryWriter(config.save_path + '/events')
        logger = create_logger('global_logger', config.save_path + '/log.txt')
        logger.info('args: {}'.format(pprint.pformat(args)))
        logger.info('config: {}'.format(pprint.pformat(config)))
    else:
        tb_logger = None

    if args.evaluate:
        if args.fusion_list is not None:
            validate(val_loader,
                     model,
                     fusion_list=args.fusion_list,
                     fuse_prob=args.fuse_prob)
        else:
            validate(val_loader, model)
        link.finalize()
        return

    train(train_loader, val_loader, model, optimizer, lr_scheduler,
          last_iter + 1, tb_logger)

    link.finalize()
示例#30
0
def validate(val_loader, model, fusion_list=None, fuse_prob=False):
    batch_time = AverageMeter(0)
    losses = AverageMeter(0)
    top1 = AverageMeter(0)
    top5 = AverageMeter(0)

    # switch to evaluate mode
    if fusion_list is not None:
        model_list = []
        for i in range(len(fusion_list)):
            model_list.append(model_entry(config.model))
            model_list[i].cuda()
            model_list[i] = DistModule(model_list[i], args.sync)
            load_state(fusion_list[i], model_list[i])
            model_list[i].eval()
        if fuse_prob:
            softmax = nn.Softmax(dim=1)
    else:
        model.eval()

    rank = link.get_rank()
    world_size = link.get_world_size()

    logger = logging.getLogger('global_logger')

    criterion = nn.CrossEntropyLoss()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            input = input.cuda() if not args.fp16 else input.half().cuda()
            target = target.cuda()
            # compute output
            if fusion_list is not None:
                output_list = []
                for model_idx in range(len(fusion_list)):
                    output = model_list[model_idx](input)
                    if fuse_prob:
                        output = softmax(output)
                    output_list.append(output)
                output = torch.stack(output_list, 0)
                output = torch.mean(output, 0)
            else:
                output = model(input)

            # measure accuracy and record loss
            loss = criterion(
                output, target
            )  #/ world_size ## loss should not be scaled here, it's reduced later!
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

            num = input.size(0)
            losses.update(loss.item(), num)
            top1.update(prec1.item(), num)
            top5.update(prec5.item(), num)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config.print_freq == 0 and rank == 0:
                logger.info(
                    'Test: [{0}/{1}]\tTime {batch_time.val:.3f} ({batch_time.avg:.3f})'
                    .format(i, len(val_loader), batch_time=batch_time))

    # gather final results
    total_num = torch.Tensor([losses.count])
    loss_sum = torch.Tensor([losses.avg * losses.count])
    top1_sum = torch.Tensor([top1.avg * top1.count])
    top5_sum = torch.Tensor([top5.avg * top5.count])
    link.allreduce(total_num)
    link.allreduce(loss_sum)
    link.allreduce(top1_sum)
    link.allreduce(top5_sum)
    final_loss = loss_sum.item() / total_num.item()
    final_top1 = top1_sum.item() / total_num.item()
    final_top5 = top5_sum.item() / total_num.item()

    if rank == 0:
        logger.info(
            ' * Prec@1 {:.3f}\tPrec@5 {:.3f}\tLoss {:.3f}\ttotal_num={}'.
            format(final_top1, final_top5, final_loss, total_num.item()))

    model.train()

    return final_loss, final_top1, final_top5
def main():

    ## config
    global args
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for k,v in config.items():    
        if isinstance(v, dict):
            argobj = ArgObj()
            setattr(args, k, argobj)
            for kk,vv in v.items():
                setattr(argobj, kk, vv)
        else:
            setattr(args, k, v)
    args.ngpu = len(args.gpus.split(','))

    ## asserts
    assert args.model.backbone in model_names, "available backbone names: {}".format(model_names)
    num_tasks = len(args.train.data_root)
    assert(num_tasks == len(args.train.loss_weight))
    assert(num_tasks == len(args.train.batch_size))
    assert(num_tasks == len(args.train.data_list))
    #assert(num_tasks == len(args.train.data_meta))
    if args.val.flag:
        assert(num_tasks == len(args.val.batch_size))
        assert(num_tasks == len(args.val.data_root))
        assert(num_tasks == len(args.val.data_list))
        #assert(num_tasks == len(args.val.data_meta))

    ## mkdir
    if not hasattr(args, 'save_path'):
        args.save_path = os.path.dirname(args.config)
    if not os.path.isdir('{}/checkpoints'.format(args.save_path)):
        os.makedirs('{}/checkpoints'.format(args.save_path))
    if not os.path.isdir('{}/logs'.format(args.save_path)):
        os.makedirs('{}/logs'.format(args.save_path))
    if not os.path.isdir('{}/events'.format(args.save_path)):
        os.makedirs('{}/events'.format(args.save_path))

    ## create dataset
    if not (args.extract or args.evaluate): # train + val
        for i in range(num_tasks):
            args.train.batch_size[i] *= args.ngpu

        #train_dataset = [FaceDataset(args, idx, 'train') for idx in range(num_tasks)]
        train_dataset = [FileListLabeledDataset(
            args.train.data_list[i], args.train.data_root[i],
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.Resize(args.model.input_size),
                transforms.ToTensor(),
                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]),
            memcached=args.memcached,
            memcached_client=args.memcached_client) for i in range(num_tasks)]
        args.num_classes = [td.num_class for td in train_dataset]
        train_longest_size = max([int(np.ceil(len(td) / float(bs))) for td, bs in zip(train_dataset, args.train.batch_size)])
        train_sampler = [GivenSizeSampler(td, total_size=train_longest_size * bs, rand_seed=args.train.rand_seed) for td, bs in zip(train_dataset, args.train.batch_size)]
        train_loader = [DataLoader(
            train_dataset[k], batch_size=args.train.batch_size[k], shuffle=False,
            num_workers=args.workers, pin_memory=False, sampler=train_sampler[k]) for k in range(num_tasks)]
        assert(all([len(train_loader[k]) == len(train_loader[0]) for k in range(num_tasks)]))

        if args.val.flag:
            for i in range(num_tasks):
                args.val.batch_size[i] *= args.ngpu
    
            #val_dataset = [FaceDataset(args, idx, 'val') for idx in range(num_tasks)]
            val_dataset = [FileListLabeledDataset(
                args.val.data_list[i], args.val.data_root[i],
                transforms.Compose([
                    transforms.Resize(args.model.input_size),
                    transforms.ToTensor(),
                    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]),
                memcached=args.memcached,
                memcached_client=args.memcached_client) for idx in range(num_tasks)]
            
            val_longest_size = max([int(np.ceil(len(vd) / float(bs))) for vd, bs in zip(val_dataset, args.val.batch_size)])
            val_sampler = [GivenSizeSampler(vd, total_size=val_longest_size * bs, sequential=True) for vd, bs in zip(val_dataset, args.val.batch_size)]
            val_loader = [DataLoader(
                val_dataset[k], batch_size=args.val.batch_size[k], shuffle=False,
                num_workers=args.workers, pin_memory=False, sampler=val_sampler[k]) for k in range(num_tasks)]
            assert(all([len(val_loader[k]) == len(val_loader[0]) for k in range(num_tasks)]))

    if args.test.flag or args.evaluate: # online or offline evaluate
        args.test.batch_size *= args.ngpu
        test_dataset = []
        for tb in args.test.benchmark:
            if tb == 'megaface':
                test_dataset.append(FileListDataset(args.test.megaface_list,
                    args.test.megaface_root, transforms.Compose([
                    transforms.Resize(args.model.input_size),
                    transforms.ToTensor(),
                    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),])))
            else:
                test_dataset.append(BinDataset("{}/{}.bin".format(args.test.test_root, tb),
                    transforms.Compose([
                    transforms.Resize(args.model.input_size),
                    transforms.ToTensor(),
                    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
                    ])))
        test_sampler = [GivenSizeSampler(td,
            total_size=int(np.ceil(len(td) / float(args.test.batch_size)) * args.test.batch_size),
            sequential=True, silent=True) for td in test_dataset]
        test_loader = [DataLoader(
            td, batch_size=args.test.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=False, sampler=ts)
            for td, ts in zip(test_dataset, test_sampler)]

    if args.extract: # feature extraction
        args.extract_info.batch_size *= args.ngpu
#        extract_dataset = FaceDataset(args, 0, 'extract')
        extract_dataset = FileListDataset(
            args.extract_info.data_list, args.extract_info.data_root,
            transforms.Compose([
                transforms.Resize(args.model.input_size),
                transforms.ToTensor(),
                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),]),
            memcached=args.memcached,
            memcached_client=args.memcached_client)
        extract_sampler = GivenSizeSampler(
            extract_dataset, total_size=int(np.ceil(len(extract_dataset) / float(args.extract_info.batch_size)) * args.extract_info.batch_size), sequential=True)
        extract_loader = DataLoader(
            extract_dataset, batch_size=args.extract_info.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=False, sampler=extract_sampler)


    ## create model
    log("Creating model on [{}] gpus: {}".format(args.ngpu, args.gpus))
    if args.evaluate or args.extract:
        args.num_classes = None
    model = models.MultiTaskWithLoss(backbone=args.model.backbone, num_classes=args.num_classes, feature_dim=args.model.feature_dim, spatial_size=args.model.input_size, arc_fc=args.model.arc_fc, feat_bn=args.model.feat_bn)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    model = nn.DataParallel(model)
    model.cuda()
    cudnn.benchmark = True

    ## criterion and optimizer
    optimizer = torch.optim.SGD(model.parameters(), args.train.base_lr,
                                momentum=args.train.momentum,
                                weight_decay=args.train.weight_decay)

    ## resume / load model
    start_epoch = 0
    count = [0]
    if args.load_path:
        assert os.path.isfile(args.load_path), "File not exist: {}".format(args.load_path)
        if args.resume:
            checkpoint = load_state(args.load_path, model, optimizer)
            start_epoch = checkpoint['epoch']
            count[0] = checkpoint['count']
        else:
            load_state(args.load_path, model)

    ## offline evaluate
    if args.evaluate:
        for tb, tl, td in zip(args.test.benchmark, test_loader, test_dataset):
            evaluation(tl, model, num=len(td),
                       outfeat_fn="{}_{}.bin".format(args.load_path[:-8], tb),
                       benchmark=tb)
        return

    ## feature extraction
    if args.extract:
        extract(extract_loader, model, num=len(extract_dataset), output_file="{}_{}.bin".format(args.load_path[:-8], args.extract_info.data_name))
        return

    ######################## train #################
    ## lr scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.train.lr_decay_steps, gamma=args.train.lr_decay_scale, last_epoch=start_epoch-1)

    ## logger
    logging.basicConfig(filename=os.path.join('{}/logs'.format(args.save_path), 'log-{}-{:02d}-{:02d}_{:02d}:{:02d}:{:02d}.txt'.format(
        datetime.today().year, datetime.today().month, datetime.today().day,
        datetime.today().hour, datetime.today().minute, datetime.today().second)),
        level=logging.INFO)
    tb_logger = SummaryWriter('{}/events'.format(args.save_path))

    ## initial validate
    if args.val.flag:
        validate(val_loader, model, start_epoch, args.train.loss_weight, len(train_loader[0]), tb_logger)

    ## initial evaluate
    if args.test.flag and args.test.initial_test:
        log("*************** evaluation epoch [{}] ***************".format(start_epoch))
        for tb, tl, td in zip(args.test.benchmark, test_loader, test_dataset):
            res = evaluation(tl, model, num=len(td),
                             outfeat_fn="{}/checkpoints/ckpt_epoch_{}_{}.bin".format(
                             args.save_path, start_epoch, tb),
                             benchmark=tb)
            tb_logger.add_scalar(tb, res, start_epoch)

    ## training loop
    for epoch in range(start_epoch, args.train.max_epoch):
        lr_scheduler.step()
        for ts in train_sampler:
            ts.set_epoch(epoch)
        # train for one epoch
        train(train_loader, model, optimizer, epoch, args.train.loss_weight, tb_logger, count)
        # save checkpoint
        save_state({
            'epoch': epoch + 1,
            'arch': args.model.backbone,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
            'count': count[0]
        }, args.save_path + "/checkpoints/ckpt_epoch", epoch + 1, is_last=(epoch + 1 == args.train.max_epoch))

        # validate
        if args.val.flag:
            validate(val_loader, model, epoch, args.train.loss_weight, len(train_loader[0]), tb_logger, count)
        # online evaluate
        if args.test.flag and ((epoch + 1) % args.test.interval == 0 or epoch + 1 == args.train.max_epoch):
            log("*************** evaluation epoch [{}] ***************".format(epoch + 1))
            for tb, tl, td in zip(args.test.benchmark, test_loader, test_dataset):
                res = evaluation(tl, model, num=len(td),
                                 outfeat_fn="{}/checkpoints/ckpt_epoch_{}_{}.bin".format(
                                 args.save_path, epoch + 1, tb),
                                 benchmark=tb)
                tb_logger.add_scalar(tb, res, start_epoch)