def update(self, X, y=None, **fit_params): if "relations" not in fit_params: raise ValueError( "Aligned UMAP requires relations between data to be " "specified") new_dict_relations = fit_params["relations"] X = check_array(X) self.__dict__ = set_aligned_params(fit_params, self.__dict__, self.n_models_) self.n_models_ += 1 new_mapper = UMAP( n_neighbors=get_nth_item_or_val(self.n_neighbors, self.n_models_), min_dist=get_nth_item_or_val(self.min_dist, self.n_models_), n_epochs=get_nth_item_or_val(self.n_epochs, self.n_models_), repulsion_strength=get_nth_item_or_val(self.repulsion_strength, self.n_models_), learning_rate=get_nth_item_or_val(self.learning_rate, self.n_models_), spread=get_nth_item_or_val(self.spread, self.n_models_), negative_sample_rate=get_nth_item_or_val(self.negative_sample_rate, self.n_models_), local_connectivity=get_nth_item_or_val(self.local_connectivity, self.n_models_), set_op_mix_ratio=get_nth_item_or_val(self.set_op_mix_ratio, self.n_models_), unique=get_nth_item_or_val(self.unique, self.n_models_), ).fit(X) self.mappers_ += [new_mapper] # TODO: We can likely make this more efficient and not recompute each time self.dict_relations_ += [invert_dict(new_dict_relations)] if self.n_epochs is None: n_epochs = 200 else: n_epochs = self.n_epochs indptr_list = numba.typed.List.empty_list(numba.types.int32[::1]) indices_list = numba.typed.List.empty_list(numba.types.int32[::1]) heads = numba.typed.List.empty_list(numba.types.int32[::1]) tails = numba.typed.List.empty_list(numba.types.int32[::1]) epochs_per_samples = numba.typed.List.empty_list( numba.types.float64[::1]) for i, mapper in enumerate(self.mappers_): indptr_list.append(mapper.graph_.indptr) indices_list.append(mapper.graph_.indices) heads.append(mapper.graph_.tocoo().row) tails.append(mapper.graph_.tocoo().col) if i == len(self.mappers_) - 1: epochs_per_samples.append( make_epochs_per_sample(mapper.graph_.tocoo().data, n_epochs)) else: epochs_per_samples.append( np.full(mapper.embedding_.shape[0], n_epochs + 1, dtype=np.float64)) new_relations = expand_relations(self.dict_relations_) new_regularisation_weights = build_neighborhood_similarities( indptr_list, indices_list, new_relations, ) new_embedding = init_from_existing(self.embeddings_[-1], new_mapper.graph_, new_dict_relations) random_state = check_random_state(self.random_state) rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) self.embeddings_.append(new_embedding) self.embeddings_ = optimize_layout_aligned_euclidean( self.embeddings_, self.embeddings_, heads, tails, n_epochs, epochs_per_samples, new_regularisation_weights, new_relations, rng_state, lambda_=self.alignment_regularisation, )
def fit(self, X, y=None, **fit_params): if "relations" not in fit_params: raise ValueError( "Aligned UMAP requires relations between data to be " "specified") self.dict_relations_ = fit_params["relations"] assert type(self.dict_relations_) in (list, tuple) assert type(X) in (list, tuple, np.ndarray) assert (len(X) - 1) == (len(self.dict_relations_)) # We need n_components to be constant or this won't work if type(self.n_components) in (list, tuple, np.ndarray): raise ValueError( "n_components must be a single integer, and cannot vary") self.n_models_ = len(X) self.mappers_ = [ UMAP( n_neighbors=get_nth_item_or_val(self.n_neighbors, n), min_dist=get_nth_item_or_val(self.min_dist, n), n_epochs=get_nth_item_or_val(self.n_epochs, n), repulsion_strength=get_nth_item_or_val(self.repulsion_strength, n), learning_rate=get_nth_item_or_val(self.learning_rate, n), spread=get_nth_item_or_val(self.spread, n), negative_sample_rate=get_nth_item_or_val( self.negative_sample_rate, n), local_connectivity=get_nth_item_or_val(self.local_connectivity, n), set_op_mix_ratio=get_nth_item_or_val(self.set_op_mix_ratio, n), unique=get_nth_item_or_val(self.unique, n), n_components=self.n_components, ).fit(X[n]) for n in range(self.n_models_) ] if self.n_epochs is None: n_epochs = 200 else: n_epochs = self.n_epochs window_size = fit_params.get("window_size", self.alignment_window_size) relations = expand_relations(self.dict_relations_, window_size) indptr_list = numba.typed.List.empty_list(numba.types.int32[::1]) indices_list = numba.typed.List.empty_list(numba.types.int32[::1]) heads = numba.typed.List.empty_list(numba.types.int32[::1]) tails = numba.typed.List.empty_list(numba.types.int32[::1]) epochs_per_samples = numba.typed.List.empty_list( numba.types.float64[::1]) for mapper in self.mappers_: indptr_list.append(mapper.graph_.indptr) indices_list.append(mapper.graph_.indices) heads.append(mapper.graph_.tocoo().row) tails.append(mapper.graph_.tocoo().col) epochs_per_samples.append( make_epochs_per_sample(mapper.graph_.tocoo().data, n_epochs)) regularisation_weights = build_neighborhood_similarities( indptr_list, indices_list, relations, ) first_init = spectral_layout( self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components, np.random, ) expansion = 10.0 / np.abs(first_init).max() first_embedding = (first_init * expansion).astype( np.float32, order="C", ) embeddings = numba.typed.List.empty_list(numba.types.float32[:, ::1]) embeddings.append(first_embedding) for i in range(1, self.n_models_): next_init = spectral_layout( self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components, np.random, ) expansion = 10.0 / np.abs(next_init).max() next_embedding = (next_init * expansion).astype( np.float32, order="C", ) anchor_data = relations[i][window_size - 1] left_anchors = anchor_data[anchor_data >= 0] right_anchors = np.where(anchor_data >= 0)[0] embeddings.append( procrustes_align( embeddings[-1], next_embedding, np.vstack([left_anchors, right_anchors]), )) random_state = check_random_state(self.random_state) rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) self.embeddings_ = optimize_layout_aligned_euclidean( embeddings, embeddings, heads, tails, n_epochs, epochs_per_samples, regularisation_weights, relations, rng_state, lambda_=self.alignment_regularisation, ) return self