示例#1
0
文件: snmf.py 项目: tmoerman/csnmf
def compute_multiple(data, ncols, alg, compress=False, n_power_iter=0, step=1):
    """
    Compute separable NMF of the input matrix with k columns, for
    k = 1 to ncols.
    :param data:  Input matrix
    :type data: numpy.ndarray
    :param ncols:  Maximum number of columns to select
    :type ncols: int
    :param alg: Choice of algorithm for computing the columns.
    One of 'SPA' or 'XRAY'.
    :type alg: basestring
    :param compress: Whether to use compression or not
    :type compress: bool
    :param n_power_iter: Number of power iterations for compression
    :type n_power_iter: int
    :param step: Step size for k
    :type step: int
    :return:
    The selected columns, the right factors of the separable NMF
    decomposition, and the relative errors.
    :rtype: list of tuples of:
     - list of ints
     - right factor matrix
     - relative error of the approximation
    """
    if compress:
        data_comp, _ = csnmf.compression.compress(data, ncols, n_power_iter)
    else:
        if isinstance(data, da.Array):
            _, data_comp = csnmf.tsqr.qr(data)
        elif isinstance(data, np.ndarray):
            _, data_comp = np.linalg.qr(data)
        else:
            raise TypeError('Cannot compute QR decomposition of matrices '
                            'of type ' + type(data).__name__)

    colnorms = _compute_colnorms(data_comp)

    try:
        data_comp = np.array(data_comp)
        colnorms = np.array(colnorms)
    except:
        raise

    results = []
    for k in range(step, ncols, step):
        cols = mrnmf.select_columns(data_comp, colnorms, alg, k)
        mat_h, error = mrnmf.nnls_frob(data_comp, cols)
        results.append((cols, mat_h, error))

    return results
示例#2
0
def compute(data, ncols, alg, compress=False, n_power_iter=0):
    """
    Compute separable NMF of the input matrix.
    :param data:  Input matrix
    :type data: numpy.ndarray
    :param ncols:  Number of columns to select
    :type ncols: int
    :param alg: Choice of algorithm for computing the columns.
    One of 'SPA' or 'XRAY'.
    :type alg: basestring
    :param compress: Whether to use compression or not
    :type compress: bool
    :param n_power_iter: Number of power iterations for compression
    :type n_power_iter: int
    :return:
    The selected columns, the right factor of the separable NMF
    decomposition, and the relative error.
    :rtype: tuple of:
     - list of ints
     - right factor matrix
     - relative error of the approximation
    """
    if compress:
        #data_comp, _ = csnmf.compression.compress(data, ncols, n_power_iter, our=False)
        data_comp, _ = csnmf.compression.compress(data, ncols, n_power_iter, our=True)
    else:
        if isinstance(data, da.Array):
            _, data_comp = csnmf.tsqr.qr(data)
        elif isinstance(data, np.ndarray):
            _, data_comp = np.linalg.qr(data)
        else:
            raise TypeError('Cannot compute QR decomposition of matrices '
                            'of type ' + type(data).__name__)
    data_comp = np.array(data_comp)

    if alg == 'SPA':
        colnorms = _compute_colnorms(data_comp)
        colnorms = np.array(colnorms)
    else:
        colnorms = None

    cols = mrnmf.select_columns(data_comp, alg, ncols, colnorms=colnorms)
    mat_h, error = mrnmf.nnls_frob(data_comp, cols)

    return cols, mat_h, error
示例#3
0
文件: snmf.py 项目: tmoerman/csnmf
def compute(data, ncols, alg, compress=False, n_power_iter=0):
    """
    Compute separable NMF of the input matrix.
    :param data:  Input matrix
    :type data: numpy.ndarray
    :param ncols:  Number of columns to select
    :type ncols: int
    :param alg: Choice of algorithm for computing the columns.
    One of 'SPA' or 'XRAY'.
    :type alg: basestring
    :param compress: Whether to use compression or not
    :type compress: bool
    :param n_power_iter: Number of power iterations for compression
    :type n_power_iter: int
    :return:
    The selected columns, the right factor of the separable NMF
    decomposition, and the relative error.
    :rtype: tuple of:
     - list of ints
     - right factor matrix
     - relative error of the approximation
    """
    if compress:
        data_comp, _ = csnmf.compression.compress(data, ncols, n_power_iter)
    else:
        if isinstance(data, da.Array):
            _, data_comp = csnmf.tsqr.qr(data)
        elif isinstance(data, np.ndarray):
            _, data_comp = np.linalg.qr(data)
        else:
            raise TypeError('Cannot compute QR decomposition of matrices '
                            'of type ' + type(data).__name__)
    data_comp = np.array(data_comp)

    if alg == 'SPA':
        colnorms = _compute_colnorms(data_comp)
        colnorms = np.array(colnorms)
    else:
        colnorms = None

    cols = mrnmf.select_columns(data_comp, alg, ncols, colnorms=colnorms)
    mat_h, error = mrnmf.nnls_frob(data_comp, cols)

    return cols, mat_h, error
示例#4
0
def test_movie(hdf_filename, base_output_name, ncols=None, interval=None,
               max_blockshape=(1e5, 100)):

    f = h5py.File(hdf_filename, 'r')
    img_shape = np.array(f['img_shape'], dtype=np.int)

    m = min(max_blockshape[0], reduce(mul, img_shape))
    if interval is not None:
        n = min(max_blockshape[1], -reduce(sub, interval))
    else:
        n = max_blockshape[1]
    m = int(m)
    n = int(n)
    data = da.from_array(f['data'], chunks=(m,n))
    if interval is not None:
        data = data[:, interval[0]:interval[1]]
        data = np.array(data.compute())

    if ncols is None:
        ncols = data.shape[1] / 120

    print(data.shape, ncols, m, n)

    t = timeit.default_timer()
    cols, mat_h, error = csnmf.snmf.compute(data, ncols, 'SPA', compress=True)
    t = timeit.default_timer() - t
    print(error)

    data = np.array(data)
    error = mrnmf.nnls_frob(data, cols)[1]

    def argsort(seq):
        return sorted(range(len(seq)), key=seq.__getitem__)

    cols_order = argsort(cols)
    cols = sorted(cols)
    mat_h = mat_h[cols_order, :]

    res_dict = {'cols': cols, 'error': error, 'time': t}
    base_str = 'error {error:.4f}; time {time:.2f}; cols {cols}'
    print(base_str.format(**res_dict))

    if interval is not None and ncols <= 10:

        colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99',
                 '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a']
        cmap = ListedColormap(colors)

        fourcc = cv2.cv.CV_FOURCC(*'mp4v')
        out = cv2.VideoWriter(base_output_name + '.avi',
                              fourcc, 8.0, (img_shape[1], img_shape[0]), True)
        max_val = np.argmax(mat_h, axis=0)
        for i in range(data.shape[1]):
            img = np.reshape(data[:, i], img_shape) * 255
            img = img.astype(np.uint8)
            norm_idx = float(max_val[i]) / ncols
            c = map(lambda x: int(x*255), cmap(norm_idx))[::-1]
            cv2.rectangle(img, (img_shape[1]-50, img_shape[0]-50),
                          (img_shape[1], img_shape[0]), c, cv2.cv.CV_FILLED)
            out.write(img)
        out.release()

        border_width = 40
        arrangement = int(math.ceil(math.sqrt(ncols)))
        plt.figure()
        for i, c in enumerate(cols):
            img = np.reshape(data[:, c], img_shape)
            norm_idx = float(i) / ncols
            ax = plt.subplot(arrangement, arrangement, i+1,
                             axisbg=cmap(norm_idx))
            ax.imshow(img, aspect='equal', origin='lower',
                      extent=(border_width, img_shape[1] - border_width,
                              border_width, img_shape[0] - border_width))
            ax.imshow(img, alpha=0)
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

        plt.tight_layout()
        plt.savefig(base_output_name + '_representatives.pdf', dpi=300)

        mat_h_norm = mat_h / np.sum(mat_h, axis=0)
        plt.figure()
        ax = plt.axes()
        for i in range(ncols):
            bottom = np.sum(mat_h_norm[:i, :], axis=0)
            norm_idx = float(i) / ncols
            ax.bar(range(data.shape[1]), mat_h_norm[i, :],  1,
                   color=cmap(norm_idx),
                   linewidth=0, bottom=bottom)
        ax.set_ylim(0, 1)

        plt.savefig(base_output_name + '_activation.pdf', dpi=300)

    for i, c in enumerate(cols):
        img = np.reshape(data[:, c], img_shape)
        plt.figure()
        ax = plt.axes()
        ax.imshow(img)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        plt.savefig(base_output_name + '_representative_{0}.png'.format(i))
        plt.close()

    plt.close('all')
示例#5
0
def test_movie(hdf_filename, base_output_name, ncols=None, interval=None,
               max_blockshape=(1e5, 100)):

    f = h5py.File(hdf_filename, 'r')
    img_shape = np.array(f['img_shape'], dtype=np.int)
    f.close()

    m = min(max_blockshape[0], reduce(mul, img_shape))
    if interval is not None:
        n = min(max_blockshape[1], -reduce(sub, interval))
    else:
        n = max_blockshape[1]
    m = int(m)
    n = int(n)
    data = into(Array, hdf_filename + '::/data', blockshape=(m, n))
    if interval is not None:
        data = data[:, interval[0]:interval[1]]
        data = np.array(data)

    if ncols is None:
        ncols = data.shape[1] / 120

    print(data.shape, ncols, m, n)

    t = timeit.default_timer()
    cols, mat_h, error = csnmf.snmf.compute(data, ncols, 'SPA', compress=True)
    t = timeit.default_timer() - t
    print(error)

    data = np.array(data)
    error = mrnmf.nnls_frob(data, cols)[1]

    def argsort(seq):
        return sorted(range(len(seq)), key=seq.__getitem__)

    cols_order = argsort(cols)
    cols = sorted(cols)
    mat_h = mat_h[cols_order, :]

    res_dict = {'cols': cols, 'error': error, 'time': t}
    base_str = 'error {error:.4f}; time {time:.2f}; cols {cols}'
    print(base_str.format(**res_dict))

    if interval is not None and ncols <= 10:

        colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99',
                 '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a']
        cmap = ListedColormap(colors)

        fourcc = cv2.cv.CV_FOURCC(*'mp4v')
        out = cv2.VideoWriter(base_output_name + '.avi',
                              fourcc, 8.0, (img_shape[1], img_shape[0]), True)
        max_val = np.argmax(mat_h, axis=0)
        for i in range(data.shape[1]):
            img = np.reshape(data[:, i], img_shape) * 255
            img = img.astype(np.uint8)
            norm_idx = float(max_val[i]) / ncols
            c = map(lambda x: int(x*255), cmap(norm_idx))[::-1]
            cv2.rectangle(img, (img_shape[1]-50, img_shape[0]-50),
                          (img_shape[1], img_shape[0]), c, cv2.cv.CV_FILLED)
            out.write(img)
        out.release()

        border_width = 40
        arrangement = int(math.ceil(math.sqrt(ncols)))
        plt.figure()
        for i, c in enumerate(cols):
            img = np.reshape(data[:, c], img_shape)
            norm_idx = float(i) / ncols
            ax = plt.subplot(arrangement, arrangement, i+1,
                             axisbg=cmap(norm_idx))
            ax.imshow(img, aspect='equal', origin='lower',
                      extent=(border_width, img_shape[1] - border_width,
                              border_width, img_shape[0] - border_width))
            ax.imshow(img, alpha=0)
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

        plt.tight_layout()
        plt.savefig(base_output_name + '_representatives.pdf', dpi=300)

        mat_h_norm = mat_h / np.sum(mat_h, axis=0)
        plt.figure()
        ax = plt.axes()
        for i in range(ncols):
            bottom = np.sum(mat_h_norm[:i, :], axis=0)
            norm_idx = float(i) / ncols
            ax.bar(range(data.shape[1]), mat_h_norm[i, :],  1,
                   color=cmap(norm_idx),
                   linewidth=0, bottom=bottom)
        ax.set_ylim(0, 1)

        plt.savefig(base_output_name + '_activation.pdf', dpi=300)

    for i, c in enumerate(cols):
        img = np.reshape(data[:, c], img_shape)
        plt.figure()
        ax = plt.axes()
        ax.imshow(img)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        plt.savefig(base_output_name + '_representative_{0}.png'.format(i))
        plt.close()

    plt.close('all')