Пример #1
0
def op_union_indices(op: Callable,
                     a: sparse.csr_matrix,
                     b: sparse.csr_matrix,
                     *,
                     default_value=0):
    assert a.shape == b.shape

    if type(a) != type(b):
        b = type(a)(b)
    a.sort_indices()
    b.sort_indices()

    # TODO: numpy is weird with bools here
    out_dtype = np.array(op(a.data[0], b.data[0])).dtype
    default_value = out_dtype.type(default_value)
    return type(a)(
        op_union_indices_csr_csr(
            op,
            a.indptr,
            a.indices,
            a.data,
            b.indptr,
            b.indices,
            b.data,
            out_dtype=out_dtype,
            default_value=default_value,
        ),
        a.shape,
    )
Пример #2
0
def csr_to_rmat(csr: sparse.csr_matrix):
    csr.sort_indices()
    t, conv_data, _ = get_type_conv(csr.dtype)
    return methods.new(
        f"{t}gRMatrix",
        j=as_integer(csr.indices),
        p=as_integer(csr.indptr),
        x=conv_data(csr.data),
        Dim=as_integer(list(csr.shape)),
    )
Пример #3
0
 def _X_to_df(self, X: sps.csr_matrix, user_ids: List[Any]) -> pd.DataFrame:
     if self.item_ids is None:
         raise RuntimeError(
             "Setting item_ids is required to use this method.")
     X.sort_indices()
     row, col = X.nonzero()
     data = X.data
     return pd.DataFrame(
         dict(
             user_id=[user_ids[r] for r in row],
             item_id=[self.item_ids[c] for c in col],
             rating=data,
         ))
Пример #4
0
    def layout(self,
               transformed: np.ndarray,
               initial_pos: np.ndarray = None,
               knn: sparse.csr_matrix = None) -> np.ndarray:
        """
		Compute Barnes-Hut approximate t-SNE layout

		Args:
			transformed:	The (typically) PCA-transformed input data, shape: (n_samples, n_components)
			n_dims:			2 or 3
			initial_pos:	Initial layout, or None to use the first components of 'transformed'
			knn: 			Precomputed knn graph in sparse matrix format, or None to use Gaussian perplexity

		Remarks:
			Requires 'bh_tsne' to be available on the $PATH
		"""
        n_cells = transformed.shape[0]
        n_components = transformed.shape[1]
        nnz = 0
        if initial_pos is None:
            initial_pos = transformed[:, :self.n_dims]
        if knn is not None:
            # knn = knn.tocsr().maximum(knn.transpose())
            # knn = knn.multiply(1 / knn.sum(axis=1)).tocsr()
            knn.sort_indices()
            nnz = knn.nnz
        with tempfile.TemporaryDirectory() as td:
            with open(os.path.join(td, 'data.dat'), 'wb') as data_file:
                # Write the bh_tsne header
                data_file.write(
                    pack('=iiiddii', n_cells, n_components, nnz, self.theta,
                         self.perplexity, self.n_dims, self.max_iter))
                # Write the initial positions
                for ix in range(n_cells):
                    pos = initial_pos[ix, :]
                    data_file.write(pack('={}d'.format(pos.shape[0]), *pos))
                if nnz != 0:
                    data_file.write(
                        pack('={}i'.format(knn.indptr.shape[0]), *knn.indptr))
                    data_file.write(
                        pack('={}i'.format(knn.indices.shape[0]),
                             *knn.indices))
                    data_file.write(
                        pack('={}d'.format(knn.data.shape[0]), *knn.data))
                # Then write the data
                for ix in range(n_cells):
                    sample = transformed[ix, :]
                    data_file.write(
                        pack('={}d'.format(sample.shape[0]), *sample))

            # Call bh_tsne and let it do its thing
            with open(os.devnull, 'w') as dev_null:
                bh_tsne_p = Popen(("bh_tsne", ), cwd=td)
                bh_tsne_p.wait()
                if bh_tsne_p.returncode != 0:
                    logging.error(
                        "TSNE layout failed to execute external binary 'bh_tsne' (check $PATH)"
                    )
                    raise RuntimeError()

            # Read and pass on the results
            with open(os.path.join(td, 'result.dat'), 'rb') as output_file:
                # The first two integers are just the number of samples and the
                #   dimensionality
                _, n_dims = _read_unpack('ii', output_file)
                # Collect the results, but they may be out of order
                results = [
                    _read_unpack('{}d'.format(n_dims), output_file)
                    for _ in range(n_cells)
                ]
                # Now collect the landmark data so that we can return the data in
                #   the order it arrived
                results = [(_read_unpack('i', output_file), e)
                           for e in results]
                # Put the results in order and yield it
                results.sort()
                xy = [result for _, result in results]
                return np.array(xy)