def main(args, _=None): """Run ``catalyst-data project-embeddings`` script.""" df = pd.read_csv(args.in_csv) os.makedirs(args.out_dir, exist_ok=True) if args.meta_cols is not None: meta_header = args.meta_cols.split(",") else: raise ValueError("meta-cols must not be None") features = np.load(args.in_npy, mmap_mode="r") assert len(df) == len(features) if args.num_rows is not None: indices = np.random.choice(len(df), args.num_rows) features = features[indices, :] df = df.iloc[indices] if args.img_col is not None: img_data = _load_image_data( rootpath=args.img_rootpath, paths=df[args.img_col].values ) else: img_data = None summary_writer = SummaryWriter(args.out_dir) metadata = df[meta_header].values.tolist() metadata = [ [ str(text) .replace("\n", " ") .replace(r"\s", " ") .replace(r"\s\s+", " ") .strip() for text in texts ] for texts in metadata ] assert len(metadata) == len(features) summary_writer.add_embedding( features, metadata=metadata, label_img=img_data, metadata_header=meta_header, ) summary_writer.close() print( f"Done. Run `tensorboard --logdir={args.out_dir}` " + "to view in Tensorboard" )
def main(args, _=None): """Run ``catalyst-data project-embeddings`` script.""" df = pd.read_csv(args.in_csv) os.makedirs(args.out_dir, exist_ok=True) if args.meta_cols is not None: meta_header = args.meta_cols.split(",") else: raise ValueError("meta-cols must not be None") features = np.load(args.in_npy, mmap_mode="r") if args.num_rows is not None: df = df.sample(n=args.num_rows) if args.img_col is not None: image_names = [ path.join(args.img_rootpath, name) for name in df[args.img_col].values ] img_data = np.stack( [load_image(name, args.img_size) for name in image_names], axis=0) img_data = ( img_data.transpose((0, 3, 1, 2)) / 255.0 # noqa: WPS432 ).astype(np.float32) img_data = torch.from_numpy(img_data) else: img_data = None summary_writer = SummaryWriter(args.out_dir) summary_writer.add_embedding( features, metadata=df[meta_header].astype(str).values, label_img=img_data, metadata_header=meta_header, ) summary_writer.close() print(f"Done. Run `tensorboard --logdir={args.out_dir}` " + "to view in Tensorboard")