def get_preprocessor(preproc_str, preproc_cachefile): if preproc_str: if not preproc_cachefile or not os.path.exists(preproc_cachefile): preproc = train_preprocessor(preproc_str) if preproc_cachefile: print("store", preproc_cachefile) faiss.write_VectorTransform(preproc, preproc_cachefile) else: print("load", preproc_cachefile) preproc = faiss.read_VectorTransform(preproc_cachefile) else: preproc = IdentPreproc(dim) return preproc
def get_preprocessor(): if preproc_str: if not preproc_cachefile or not os.path.exists(preproc_cachefile): preproc = train_preprocessor() if preproc_cachefile: print "store", preproc_cachefile faiss.write_VectorTransform(preproc, preproc_cachefile) else: print "load", preproc_cachefile preproc = faiss.read_VectorTransform(preproc_cachefile) else: d = xb.shape[1] preproc = IdentPreproc(d) return preproc
def zipBinaryTrainingParams(self, preproc, coarse_quantizer, codesIndex): faiss.write_index(codesIndex, 'tmp1') # faiss.write_ProductQuantizer(coarse_quantizer,'tmp2') faiss.write_VectorTransform(preproc, 'tmp3') with open('tmp1', 'r+b') as fp: bin_index = fp.read() # with open('tmp2','r+b') as fp: # bin_coarsequantizer = fp.read() with open('tmp3', 'r+b') as fp: bin_preproc = fp.read() index_length = ("%012d" % len(bin_index)).encode('ascii') # quantizer_length = ("%012d"%len(bin_coarsequantizer)).encode('ascii') preproc_length = ("%012d" % len(bin_preproc)).encode('ascii') data = index_length + bin_index + preproc_length + bin_preproc return data
def train_preprocessor(self, preproc_str_local, xt_local): if not self.preproc_cachefile or not os.path.exists( self.preproc_cachefile): print("train preproc", preproc_str_local) d = xt_local.shape[1] t0 = time.time() if preproc_str_local.startswith('OPQ'): fi = preproc_str_local[3:].split('_') m = int(fi[0]) dout = int(fi[1]) if len(fi) == 2 else d preproc = faiss.OPQMatrix(d, m, dout) elif preproc_str_local.startswith('PCAR'): dout = int(preproc_str_local[4:-1]) preproc = faiss.PCAMatrix(d, dout, 0, True) else: assert False preproc.train(indexfunctions.sanitize(xt_local[:100000000])) print("preproc train done in %.3f s" % (time.time() - t0)) faiss.write_VectorTransform(preproc, self.preproc_cachefile) else: print("load preproc ", self.preproc_cachefile) preproc = faiss.read_VectorTransform(self.preproc_cachefile) return preproc
def savemat(self, path): faiss.write_VectorTransform(self.mat, path)
def serializeIndex(self, indexFilePath=None, mmapPath=None): import mmap index_tmp_path = 'tmpIndex_%03d' % self.machine_num if indexFilePath is None: indexFilePath = index_tmp_path faiss.write_index(self.index, index_tmp_path) with open(indexFilePath, 'r+b') as fp: # bin_index = fp.read() bin_index_map = mmap.mmap(fp.fileno(), 0) with open(self.emptyIndexPath, 'r+b') as fp: # bin_codes = fp.read() bin_codes_map = mmap.mmap(fp.fileno(), 0) if not os.path.exists(self.preproc_cachefile): faiss.write_VectorTransform(self.preproc, self.preproc_cachefile) with open(self.preproc_cachefile, 'r+b') as fp: # bin_preproc = fp.read() bin_preproc_map = mmap.mmap(fp.fileno(), 0) if mmapPath is not None: print('saving ivf mmap data to binary...') with open(mmapPath, 'r+b') as fp: bin_ivf_mmap_map = mmap.mmap(fp.fileno(), 0) ivf_length = ("%012d" % len(bin_ivf_mmap_map)).encode('ascii') ivf_mmap_path = mmapPath.encode('ascii') ivf_mmap_path_length = ("%012d" % len(ivf_mmap_path)).encode('ascii') bin_IDtoNameMap = json.dumps(self.IDtoNameMap).encode('ascii') index_length = ("%012d" % len(bin_index_map)).encode('ascii') codes_length = ("%012d" % len(bin_codes_map)).encode('ascii') preproc_length = ("%012d" % len(bin_preproc_map)).encode('ascii') map_length = ("%012d" % len(bin_IDtoNameMap)).encode('ascii') totalLength = len(index_length) + len(bin_index_map) + len( codes_length) + len(bin_codes_map) + len(preproc_length) + len( bin_preproc_map) + len(map_length) + len(bin_IDtoNameMap) if mmapPath is not None: totalLength += len(ivf_length) + len(bin_ivf_mmap_map) + len( ivf_mmap_path_length) + len(ivf_mmap_path) print('creating final binary file of size ', totalLength / 1024 / 1024, ' MB') with open('tmp_binary_index.dat', 'wb') as fp: #final_index_bin_map = mmap.mmap(fp.fileno(),totalLength) #final_index_bin_map.seek(0) print('writing binary to mmaped file...') fp.write(index_length) fp.write(bin_index_map) fp.write(codes_length) fp.write(bin_codes_map) fp.write(preproc_length) fp.write(bin_preproc_map) fp.write(map_length) fp.write(bin_IDtoNameMap) if mmapPath is not None: print('writing ivf mmpa data') print('lenght: ', ivf_mmap_path_length) fp.write(ivf_mmap_path_length) print('path: ', ivf_mmap_path) fp.write(ivf_mmap_path) print('length: ', ivf_length) fp.write(ivf_length) print('map size: ', len(bin_ivf_mmap_map)) fp.write(bin_ivf_mmap_map) print('Memory mapping final index file...') with open('tmp_binary_index.dat', 'r+b') as fp: final_index_bin_map = mmap.mmap(fp.fileno(), 0) print('returning final binary') #final_index_bin = index_length + bin_index_map[:] + codes_length + bin_codes_map[:] + preproc_length + bin_preproc_map[:] + map_length + bin_IDtoNameMap return final_index_bin_map[:]
train_subset[subset_i:subset_i+n_features] = features[:n_features] #for n_feature in range(n_features): # index_dict[subset_i+n_feature] = int(label) subset_i += n_features if pca: if os.path.exists(INDEX_FILENAME_PCA): mat = faiss.read_VectorTransform(INDEX_FILENAME_PCA) else: mat = faiss.PCAMatrix (FEATURES_NUMBER, PCA_FEATURES) print("PCA training... started") mat.train(train_subset) print("PCA training... finished") faiss.write_VectorTransform(mat, INDEX_FILENAME_PCA) if pca: print("PCA transformation... started") train_subset = mat.apply_py(train_subset) if pca else train_subset print("PCA transformation... finished") cpu_index = faiss.IndexFlatL2(PCA_FEATURES if pca else FEATURES_NUMBER) #cpu_index = faiss.index_factory(PCA_FEATURES if pca else FEATURES_NUMBER, "IVF4096,Flat") index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) if gpu else cpu_index#, co) #nlist = 1000 if train: print("Training index... started") #quantizer = faiss.IndexFlatL2(FEATURES_NUMBER) # the other index #index = faiss.IndexIVFFlat(quantizer, FEATURES_NUMBER, nlist, faiss.METRIC_L2) # faster, uses more memory
def export(args, model, dataloader, dataset): # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # get the features for the whole dataset features, idxs, pos1 = dc_main.compute_features(dataloader, model, len(dataset), args) idxs = idxs[np.argsort(idxs)] features = features[np.argsort(idxs)] if args.group > 1: args.group = args.ep_length - args.traj_length + 1 # clustering algorithm to use deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, group=args.group) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) centroids = deepcluster.clus.centroids # centroids = faiss.vector_float_to_array(deepcluster.clus.get_means_and_variances) # centroids = centroids.reshape(nmb_cluster, 256) # import pdb; pdb.set_trace() # self_index = faiss.IndexFlatL2(centroids.shape[1]) # build the index # self_index.add(centroids) # self_dists = self_index.search(centroids, centroids.shape[0]) _, (mean, std), _, _ = vis_utils.make_transform(args.data) model.features = model.features.module c_mean, c_cov, c_var = get_means_and_variances(deepcluster, features, args) resume = args.resume if len(args.resume) > 0 else args.exp out = { 'state_dict': model.state_dict(), 'centroids': centroids, 'pca_path': resume + '.pca', 'mean': mean, 'std': std, # 'cluster_mean': c_mean, 'cluster_cov': c_cov, 'clus': deepcluster.clus, } if args.export > 0: faiss.write_VectorTransform(deepcluster.mat, resume + '.pca') torch.save(out, resume + '.clus') out['pca'] = deepcluster.mat T = args.traj_length pos = pos1 if sum(sum(pos)) == 0: meta = torch.load('%s/meta.dict' % args.data) pos = np.array(meta['pos']) pos_idx = np.arange(pos.shape[0]*pos.shape[1]) pos_idx = pos_idx.reshape(pos.shape[0], pos.shape[1])[:, T-1:] pos_idx = pos_idx.reshape(pos_idx.shape[0] * pos_idx.shape[1]) pos = pos.reshape(pos.shape[0]*pos.shape[1], pos.shape[2]) else: meta = torch.load('/data3/ajabri/vizdoom/single_env_hard_fixed1/0/meta.dict') # import pdb; pdb.set_trace() sz = 30 from scipy.ndimage.filters import gaussian_filter def get_obj_masks(objs): out = np.zeros((3, sz, sz)) for o in objs[0]: # import pdb; pdb.set_trace() x, y = o x, y = int((x - x0)/x1 *sz), int((y-y0)/y1 * sz) out[:, x:x+1, y:y+1] = 1 return out def get_mask_from_coord(coord): import matplotlib.cm as cm x, y, a = coord x, y = int(x), int(y) out = np.zeros((3, sz, sz)) out[:, x, y] = cm.jet(a)[:3] return out # import pdb; pdb.set_trace() # sorted_self_dists = np.argsort(self_dists[0][:, 1])[::-1] # sorted_self_dists = np.argsort(self_dists[0].sum(axis=-1))[::-1] smoother1 = models.mini_models.GaussianSmoothing(3, 5, 5) smoother2 = models.mini_models.GaussianSmoothing(3, 7, 5) smoother3 = models.mini_models.GaussianSmoothing(3, 7, 7) smoother4 = models.mini_models.GaussianSmoothing(3, 9, 7) exp_name = args.resume.split('/')[-2] if args.resume != '' else args.exp.split('/')[-1] out_root = '%s/%s' % (args.export_path, exp_name) # import pdb; pdb.set_trace() if not os.path.exists(out_root): os.makedirs(out_root) table = Table() num_show = 8 sorted_variance = np.argsort(c_var)[::-1] sorted_sizes = np.argsort([len(ll) for ll in deepcluster.images_dists])[::-1] # import pdb; pdb.set_trace() for c, clus_idx in enumerate(sorted_sizes): # for c, clus_idx in enumerate(sorted_variance): # for c, clus_idx in enumerate(sorted_self_dists): l = deepcluster.images_dists[clus_idx] if len(l) == 0: continue ll = [ii[0] for ii in sorted(l, key=lambda x: x[1])[::-1]][:num_show//2] ll += [ii[0] for ii in random.sample(l, min(num_show//2, len(l)))] l = [ii[0] for ii in l] row = TableRow(rno=c) e = Element() e.addTxt('size: %s <br>variance: %s' % (len(deepcluster.images_dists[clus_idx]), c_var[clus_idx])) row.addElement(e) # import pdb; pdb.set_trace() ## MAP poo = [] for t in range(T): poo += [pos[np.array(l) - t]] posum = env.make_pose_map(np.concatenate(poo), meta['objs'][0], sz=sz) # posum *= 255.0 # vis.image((posum*255.).astype(np.uint8), opts=dict(width=300, height=300)) # vis.image(gaussian_filter((posum*255.), sigma=1).astype(np.uint8), opts=dict(width=300, height=300)) # gifname = '%s/%s_%s.png' % (exp_name, c, 'map') gifname = '%s_%s.png' % (c, 'map') gifpath = '%s/%s' % (out_root, gifname) imageio.imwrite(gifpath, cv2.resize((posum*255.).astype(np.uint8).transpose(1, 2, 0), (0,0), fx=5, fy=5, interpolation = cv2.INTER_AREA)) e = Element() e.addImg(gifname, width=180) row.addElement(e) ## EXEMPLARS for iii, i in enumerate(ll): # import pdb; pdb.set_trace() imgs = vis_utils.unnormalize_batch(dataset[i][0], mean, std) # vis.images(imgs, opts=dict(title=f"{c} of length {len(l)}")) # vis.images(smoother1(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # vis.images(smoother2(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # vis.images(smoother3(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # vis.images(smoother4(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # gifname = '%s/%s_%s.gif' % (exp_name, c, i) gifname = '%s_%s.gif' % (c, i) gifpath = '%s/%s' % (out_root, gifname) vis_utils.make_gif_from_tensor(imgs.astype(np.uint8), gifpath) e = Element() if iii < num_show // 2: e.addTxt('rank %i<br>' % iii) else: e.addTxt('random<br>') e.addImg(gifname, width=128) row.addElement(e) ## EXEMPLARS # import visdom # vis = visdom.Visdom(port=8095, env='main', use_incoming_socket=False) # vis.images(vis_utils.unnormalize_batch( # np.stack([dataset[iii][0][0] for iii in range(-100, -50)]), mean, std # )) # import pdb; pdb.set_trace() # gl = np.array(l).reshape(-1, args.group) # if args.group > 10: # exemplars = gl[random.sample(list(range(gl.shape[0])), 4)] # else: # exemplars = gl[random.sample(list(range(gl.shape[0])), 10)] # for iii, i in enumerate(exemplars): # # import pdb; pdb.set_trace() # # imgs = vis_utils.unnormalize_batch(dataset[i][0], mean, std) # imgs = np.stack([dataset[_idx][0][0] for _idx in i]) # imgs = vis_utils.unnormalize_batch(imgs, mean, std) # # import pdb; pdb.set_trace() # # imgs = vis_utils.unnormalize_batch(dataset[i][0], mean, std) # # vis.images(imgs, opts=dict(title=f"{c} of length {len(l)}")) # # vis.images(smoother1(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # # vis.images(smoother2(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # # vis.images(smoother3(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # # vis.images(smoother4(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}")) # # gifname = '%s/%s_%s.gif' % (exp_name, c, i) # gifname = '%s_%s.gif' % (c, i[0]) # gifpath = '%s/%s' % (out_root, gifname) # vis_utils.make_gif_from_tensor(imgs.astype(np.uint8), gifpath) # e = Element() # if iii < num_show // 2: # e.addTxt('rank %i<br>' % iii) # else: # e.addTxt('random<br>') # e.addImg(gifname, width=128) # row.addElement(e) table.addRow(row) tw = TableWriter(table, '%s/%s' % (args.export_path, exp_name), rowsPerPage=min(args.nmb_cluster,100)) tw.write() # import pdb; pdb.set_trace() return out
def store_styleft(): vgg_ = vgg16().cuda() for param in vgg_.parameters(): param.requires_grad = False vgg_.top_layer = None encoder_path = '/home/visiting/Projects/levishery/checkpoint.pth.tar' data_path = '/home/visiting/datasets/crop_vangogh_original' if os.path.isfile(encoder_path): print("=> loading encoder '{}'".format(encoder_path)) checkpoint = torch.load(encoder_path) # remove top_layer and classifier parameters from checkpoint for key in list(checkpoint['state_dict']): if 'top_layer' in key: del checkpoint['state_dict'][key] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in checkpoint['state_dict'].items(): if 'feature' in k: name = k[:8] + k[15:] # remove `module.` new_state_dict[name] = v else: new_state_dict[k] = v vgg_.load_state_dict(new_state_dict) print("=> loaded checkpoint '{}' (epoch {})".format( encoder_path, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(encoder_path)) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [transforms.Resize(240), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(data_path, transform=transforms.Compose(tra)) print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, num_workers=4) for i, (input_tensor, _) in enumerate(dataloader): input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True) features = vgg_.features(input_var) PATCH_NUM = 10 features = features.unfold(2, 3, 3).unfold(3, 3, 3) features = features.permute(0, 2, 3, 1, 4, 5) x = features.reshape(features.size(0) * PATCH_NUM * PATCH_NUM, -1) x = vgg_.classifier(x).cpu().numpy() features = features.cpu().numpy() if i == 0: store_features = np.zeros( (len(dataset.imgs), features.shape[1], features.shape[2], features.shape[3], features.shape[4], features.shape[5])).astype('float32') store_linear = np.zeros((len(dataset.imgs) * PATCH_NUM * PATCH_NUM, x.shape[1])).astype('float32') if i < len(dataloader) - 1: store_features[i * 16:(i + 1) * 16] = features.astype('float32') store_linear[i * 16 * PATCH_NUM * PATCH_NUM:(i + 1) * 16 * PATCH_NUM * PATCH_NUM] = x.astype('float32') else: # special treatment for final batch store_features[i * 16:] = features.astype('float32') store_linear[i * 16 * PATCH_NUM * PATCH_NUM:] = x.astype('float32') small_ft, pca = index_features(store_linear) faiss.write_VectorTransform(pca, "vangogh.pca") small_ft = small_ft.tolist() store_features = store_features.tolist() file_name = '/home/visiting/Projects/levishery/reconstruction/vangogh_index.json' print('start writing index') with open(file_name, 'w') as file_object: json.dump(small_ft, file_object) file_name = '/home/visiting/Projects/levishery/reconstruction/vangogh_features.json' print('start writing features') with open(file_name, 'w') as file_object: json.dump(store_features, file_object)
fv3_dir = os.getenv('DDIR') + '/features/' if 'train' in todo: f = h5py.File(fv3_dir + 'f100m/block0.hdf5', 'r') count = f['count'][0] labels = f['all_labels'][:count] features = f['all_feats'][:count] pca = faiss.PCAMatrix(2048, 256, 0, True) pca.train(features) faiss.write_VectorTransform(pca, fv3_dir + 'PCAR256.vt') if 'apply' in todo: pca = faiss.read_VectorTransform(fv3_dir + 'PCAR256.vt') def load_block(i): f = h5py.File(fv3_dir + 'f100m/block%d.hdf5' % i, 'r') count = f['count'][0] # labels = f['all_labels'][:count] features = f['all_feats'][:count] return features # one read thread, one PCA computation thread, and main thread writes result. src = rate_limited_imap(load_block, range(100)) src2 = rate_limited_imap(pca.apply_py, src) f = open(fv3_dir + '/concatenated_PCAR256.raw', 'w')
def store_styleft(): global args args = parser.parse_args() # fix random seeds # torch.manual_seed(args.seed) # torch.cuda.manual_seed_all(args.seed) # np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [transforms.Resize(224), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers) # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # get the features and dominate angles for the whole dataset if args.patch_rotate: angles = [] for file in dataset.imgs: filepath = file[0] img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) img = cv2.resize(img, (16, 16)) angles.append(calc_phase(img)) features = compute_features(dataloader, model, len(dataset)) small_ft, pca = preprocess_features(features) faiss.write_VectorTransform(pca, "gauguin-all.pca") #mat = faiss.read_VectorTransform("PCA_128.pca") #print(mat) small_ft = small_ft.tolist() file_name = '/home/visiting/Projects/levishery/deep_cluster/gauguin_all.json' with open(file_name, 'w') as file_object: json.dump(small_ft, file_object) if args.patch_rotate: file_name = '/home/visiting/Projects/levishery/deep_cluster/angle_128.json' with open(file_name, 'w') as file_object: json.dump(angles, file_object)