def use_gpu(gpu_id): try: import cupy.cuda.device except ImportError: return None from thinc.neural.ops import CupyOps device = cupy.cuda.device.Device(gpu_id) device.use() Model.ops = CupyOps() Model.Ops = CupyOps return device
def set_active_gpu(gpu_id): import cupy.cuda.device device = cupy.cuda.device.Device(gpu_id) device.use() try: import torch torch.cuda.set_device(gpu_id) torch.set_default_tensor_type("torch.cuda.FloatTensor") except ImportError: pass return device
def set_active_gpu(gpu_id: int) -> "cupy.cuda.Device": # pragma: no cover """Set the current GPU device for cupy and torch (if available).""" import cupy.cuda.device device = cupy.cuda.device.Device(gpu_id) device.use() try: import torch torch.cuda.set_device(gpu_id) torch.set_default_tensor_type("torch.cuda.FloatTensor") except ImportError: pass return device
def to_gpu(self, device_num): import cupy.cuda.device device = cupy.cuda.device.Device(device_num) device.use() queue = [self] for layer in queue: layer.ops = CupyOps() layer.Ops = CupyOps if hasattr(layer, '_mem'): layer._mem._mem = self.ops.xp.asarray(layer._mem._mem) layer._mem.ops = layer.ops if hasattr(layer, '_layers'): queue.extend(layer._layers) return device
def to_gpu(self, device_num): import cupy.cuda.device device = cupy.cuda.device.Device(device_num) device.use() queue = [self] for layer in queue: layer.ops = CupyOps() layer.Ops = CupyOps if hasattr(layer, u'_mem'): layer._mem._mem = self.ops.xp.asarray(layer._mem._mem) layer._mem.ops = layer.ops if hasattr(layer, u'_layers'): queue.extend(layer._layers) return device
def to_gpu(self, gpu_id: int) -> None: # pragma: no cover """Transfer the model to a given GPU device.""" import cupy.cuda.device device = cupy.cuda.device.Device(gpu_id) with device.use(): self._to_ops(CupyOps())
def main(vectors, gpu_id=-1, n_neighbors=100, batch_size=1024, cutoff=0, start=0, end=None): """ Step 6: Precompute nearest-neighbor queries (optional) Precompute nearest-neighbor queries for every entry in the vocab to make Sense2Vec.most_similar faster. The --cutoff option lets you define the number of earliest rows to limit the neighbors to. For instance, if cutoff is 100000, no word will have a nearest neighbor outside of the top 100k vectors. """ if gpu_id == -1: xp = numpy else: import cupy as xp import cupy.cuda.device cupy.take_along_axis = take_along_axis device = cupy.cuda.device.Device(gpu_id) device.use() vectors_dir = Path(vectors) vectors_file = vectors_dir / "vectors" if not vectors_dir.is_dir() or not vectors_file.exists(): err = "Are you passing in the exported sense2vec directory containing a vectors file?" msg.fail(f"Can't load vectors from {vectors}", err, exits=1) with msg.loading(f"Loading vectors from {vectors}"): vectors = xp.load(str(vectors_file)) msg.good( f"Loaded {vectors.shape[0]:,} vectors with dimension {vectors.shape[1]}" ) norms = xp.linalg.norm(vectors, axis=1, keepdims=True) norms[norms == 0] = 1 # Normalize to unit norm vectors /= norms if cutoff < 1: cutoff = vectors.shape[0] if end is None: end = vectors.shape[0] mean = float(norms.mean()) var = float(norms.var()) msg.good(f"Normalized (mean {mean:,.2f}, variance {var:,.2f})") msg.info( f"Finding {n_neighbors:,} neighbors among {cutoff:,} most frequent") n = min(n_neighbors, vectors.shape[0]) subset = vectors[:cutoff] best_rows = xp.zeros((end - start, n), dtype="i") scores = xp.zeros((end - start, n), dtype="f") for i in tqdm.tqdm(list(range(start, end, batch_size))): size = min(batch_size, end - i) batch = vectors[i:i + size] sims = xp.dot(batch, subset.T) # Set self-similarities to -inf, so that we don't return them. for j in range(size): if i + j < sims.shape[1]: sims[j, i + j] = -xp.inf # This used to use argpartition, to do a partial sort...But this ended # up being a ratsnest of terrible numpy crap. Just sorting the whole # list isn't really slower, and it's much simpler to read. ranks = xp.argsort(sims, axis=1) batch_rows = ranks[:, -n:] # Reverse batch_rows = batch_rows[:, ::-1] batch_scores = xp.take_along_axis(sims, batch_rows, axis=1) best_rows[i:i + size] = batch_rows scores[i:i + size] = batch_scores msg.info("Saving output") if not isinstance(best_rows, numpy.ndarray): best_rows = best_rows.get() if not isinstance(scores, numpy.ndarray): scores = scores.get() output = { "indices": best_rows, "scores": scores.astype("float16"), "start": start, "end": end, "cutoff": cutoff, } output_file = vectors_dir / "cache" with msg.loading("Saving output..."): srsly.write_msgpack(output_file, output) msg.good(f"Saved cache to {output_file}")