def split_data(root_dir, domain, r_val, r_data, rand_seed): root_dir = Path(root_dir) # test xt, yt = read(root_dir / domain / f'{domain}_test.txt') xt = xt.tolist() for i, xi in enumerate(xt): xt[i] = root_dir / xi xt = np.array(xt) xd, yd = read(root_dir / domain / f'{domain}_train.txt') # val if r_val is not None: (xv, yv), (xd, yd) = data.split_data(xd.copy(), yd.copy(), rand_seed, r_val) xv = xv.tolist() for i, xi in enumerate(xv): xv[i] = root_dir / xi xv = np.array(xv) else: xv, yv = xt, yt # train x, y = data.split_data(xd.copy(), yd.copy(), rand_seed, r_data)[0] x = x.tolist() for i, xi in enumerate(x): x[i] = root_dir / xi x = np.array(x) return x, y, xv, yv, xt, yt
def split_data(root_dir, tgt_domains, src_domains, r_val, r_lab, r_unlab, w_unlab, rand_seed, r_data=None): root_dir = Path(root_dir) # test d = sio.loadmat(root_dir / 'test_32x32.mat') xt = np.transpose(d['X'], (3, 0, 1, 2)) yt = d['y'].reshape(-1).astype(int) - 1 # SVHN labels are 1-10 # val, lab, unlab d = sio.loadmat(root_dir / 'train_32x32.mat') x = np.transpose(d['X'], (3, 0, 1, 2)) y = d['y'].reshape(-1).astype(int) - 1 # SVHN labels are 1-10 if r_val is not None: (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed, r_val) else: xv, yv = xt, yt (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed, r_lab) # reduce data if r_data is not None: xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed, r_data)[0] return xl, yl, xu, xv, yv, xt, yt
def split_data(root_dir, tgt_domains, src_domains, r_val, r_lab, r_unlab, w_unlab, rand_seed, r_data=None): root_dir = Path(root_dir) # test file = root_dir / 'test_batch' batch = pickle.load(open(file, 'rb'), encoding='latin1') xt = np.transpose(batch['data'].reshape((-1, 3, 32, 32)), (0, 2, 3, 1)) yt = np.array(batch['labels'], dtype=np.int) # val, lab, unlab files = [root_dir / f'data_batch_{i}' for i in range(1, 6)] batches = [ pickle.load(open(file, 'rb'), encoding='latin1') for file in files ] x = [batch['data'].reshape((-1, 3, 32, 32)) for batch in batches] x = np.concatenate([np.transpose(xi, (0, 2, 3, 1)) for xi in x]) y = np.concatenate( [np.array(batch['labels'], dtype=np.int) for batch in batches]) if r_val is not None: (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed, r_val) else: xv, yv = xt, yt (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed, r_lab) # reduce data if r_data is not None: xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed, r_data)[0] return xl, yl, xu, xv, yv, xt, yt
def split_data(root_dir, tgt_domains, src_domains, r_val, r_lab, r_unlab, w_unlab, rand_seed, r_data=None): root_dir = Path(root_dir) # test xt = np.load(root_dir / 'xtest.npy') yt = np.load(root_dir / 'ytest.npy') classes = np.unique(yt) name2num = dict(zip(classes, np.arange(len(classes)))) yt = np.array([name2num[yi] for yi in yt]) # val, lab, unlab x = np.load(root_dir / 'xtrain.npy') y = np.load(root_dir / 'ytrain.npy') x, y = data.shuffle_data([x, y], rand_seed) x, y = x[:50000], y[:50000] y = np.array([name2num[yi] for yi in y]) if r_val is not None: (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed, r_val) else: xv, yv = xt, yt (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed, r_lab) # reduce data if r_data is not None: xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed, r_data)[0] return xl, yl, xu, xv, yv, xt, yt
def split_data(root_dir, tgt_domains, src_domains, r_val, r_lab, r_unlab, w_unlab, rand_seed, r_data=None): root_dir = Path(root_dir) # test file = root_dir / 'test' batch = pickle.load(open(file, 'rb'), encoding='latin1') xt = np.transpose(batch['data'].reshape((-1, 3, 32, 32)), (0, 2, 3, 1)) yt = np.array(batch['fine_labels'], dtype=np.int) # val, lab, unlab file = root_dir / 'train' batch = pickle.load(open(file, 'rb'), encoding='latin1') x = np.transpose(batch['data'].reshape((-1, 3, 32, 32)), (0, 2, 3, 1)) y = np.array(batch['fine_labels'], dtype=np.int) if r_val is not None: (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed, r_val) else: xv, yv = xt, yt (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed, r_lab) # reduce data if r_data is not None: xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed, r_data)[0] return xl, yl, xu, xv, yv, xt, yt
def split_data(root_dir, tgt_domains, src_domains, r_val, r_lab, r_unlab, w_unlab, rand_seed, r_data): root_dir = Path(root_dir) w_unlab = np.array(w_unlab) / np.sum(w_unlab) if len(set(tgt_domains) & set(src_domains)) != 0: print('tgt_domains should not overlap with src_domains') raise AttributeError # target test xt, yt = [], [] for i, domain in enumerate(tgt_domains): xd, yd = read(root_dir / domain / f'{domain}_test.txt') xt.extend(xd.tolist()) yt.extend(yd.tolist()) for i, xi in enumerate(xt): xt[i] = root_dir / xi xt, yt = np.array(xt), np.array(yt) # target val, target lab, target unlab xv, yv, xl, yl, xu, yu, Nu = [], [], [], [], [], [], 0 for i, domain in enumerate(tgt_domains): xd, yd = read(root_dir / domain / f'{domain}_train.txt') # target val if r_val is not None: (xvd, yvd), (xd, yd) = data.split_data(xd.copy(), yd.copy(), rand_seed, r_val) xv.extend(xvd.tolist()) yv.extend(yvd.tolist()) # target lab (xld, yld), (xud, yud) = data.split_data(xd.copy(), yd.copy(), rand_seed, r_lab) xl.extend(xld.tolist()) yl.extend(yld.tolist()) # target unlab (xdu, ydu), (xres, _) = data.split_data(xud.copy(), yud.copy(), rand_seed, 1. - r_unlab) xu.extend(xdu.tolist()) yu.extend(ydu.tolist()) Nu += len(xres) if r_val is not None: for i, xi in enumerate(xv): xv[i] = root_dir / xi xv, yv = np.array(xv), np.array(yv) else: xv, yv = xt, yt for i, xi in enumerate(xl): xl[i] = root_dir / xi xl, yl = np.array(xl), np.array(yl) # source unlab for i, domain in enumerate(src_domains): xd, yd = read(root_dir / domain / f'{domain}_train.txt') Ndu = int(round(Nu * w_unlab[i])) xd, yd = data.split_data(xd.copy(), yd.copy(), rand_seed, Ndu)[0] xu.extend(xd.tolist()) yu.extend(yd.tolist()) for i, xi in enumerate(xu): xu[i] = root_dir / xi xu, yu = np.array(xu), np.array(yu) # reduce data if r_data is not None: xl, yl = data.split_data(xl.copy(), yl.copy(), rand_seed, r_data)[0] xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed, r_data)[0] return xl, yl, xu, xv, yv, xt, yt