Exemplo n.º 1
0
def pack(data, ind=None, dims=None, sorting=False, axes=None):
    """Pack an RDD into a dense local array, with options for
    sorting, reshaping, and projecting based on keys

    Parameters
    ----------
    data : RDD of (tuple, array) pairs
        The data to pack into a local array

    ind : int, optional, default = None
        An index, if each record has multiple entries

    dims : Dimensions, optional, default = None
        Dimensions of the keys, for use with sorting and reshaping

    sorting : Boolean, optional, default = False
        Whether to sort the RDD before packing

    axes : int, optional, default = None
        Which axis to do maximum projection along

    Returns
    -------
    result : array
        A local numpy array with the RDD contents

    """

    if dims is None:
        dims = getdims(data)

    if axes is not None:
        nkeys = len(data.first()[0])
        data = data.map(lambda (k, v): (tuple(
            array(k)[arange(0, nkeys) != axes]), v)).reduceByKey(maximum)
        dims.min = list(array(dims.min)[arange(0, nkeys) != axes])
        dims.max = list(array(dims.max)[arange(0, nkeys) != axes])
        sorting = True  # will always need to sort because reduceByKey changes order

    if ind is None:
        result = data.map(lambda (_, v): float16(v)).collect()
        nout = size(result[0])
    else:
        result = data.map(lambda (_, v): float16(v[ind])).collect()
        nout = size(ind)

    if sorting is True:
        data = subtoind(data, dims.max)
        keys = data.map(lambda (k, _): int(k)).collect()
        result = array(
            [v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])

    return squeeze(transpose(reshape(result, ((nout, ) + dims.count())[::-1])))
Exemplo n.º 2
0
def pack(data, ind=None, dims=None, sorting=False, axes=None):
    """Pack an RDD into a dense local array, with options for
    sorting, reshaping, and projecting based on keys

    Parameters
    ----------
    data : RDD of (tuple, array) pairs
        The data to pack into a local array

    ind : int, optional, default = None
        An index, if each record has multiple entries

    dims : Dimensions, optional, default = None
        Dimensions of the keys, for use with sorting and reshaping

    sorting : Boolean, optional, default = False
        Whether to sort the RDD before packing

    axes : int, optional, default = None
        Which axis to do maximum projection along

    Returns
    -------
    result : array
        A local numpy array with the RDD contents

    """

    if dims is None:
        dims = getdims(data)

    if axes is not None:
        nkeys = len(data.first()[0])
        data = data.map(lambda (k, v): (tuple(array(k)[arange(0, nkeys) != axes]), v)).reduceByKey(maximum)
        dims.min = list(array(dims.min)[arange(0, nkeys) != axes])
        dims.max = list(array(dims.max)[arange(0, nkeys) != axes])
        sorting = True  # will always need to sort because reduceByKey changes order

    if ind is None:
        result = data.map(lambda (_, v): float16(v)).collect()
        nout = size(result[0])
    else:
        result = data.map(lambda (_, v): float16(v[ind])).collect()
        nout = size(ind)

    if sorting is True:
        data = subtoind(data, dims.max)
        keys = data.map(lambda (k, _): int(k)).collect()
        result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])

    return squeeze(transpose(reshape(result, ((nout,) + dims.count())[::-1])))
Exemplo n.º 3
0
def pack(data, ind=None, dims=None, sorting=False, axis=None):
    """Pack an RDD into a dense local array, with options for
    sorting, reshaping, and projecting based on keys

    Parameters
    ----------
    data : RDD of (tuple, array) pairs
        The data to pack into a local array

    ind : int, optional, default = None
        An index, if each record has multiple entries

    dims : Dimensions, optional, default = None
        Dimensions of the keys, for use with sorting and reshaping

    sorting : Boolean, optional, default = False
        Whether to sort the RDD before packing

    axis : int, optional, default = None
        Which axis to do maximum projection along

    Returns
    -------
    result : array
        A local numpy array with the RDD contents

    """

    if dims is None:
        dims = getdims(data)

    if axis is not None:
        nkeys = len(data.first()[0])
        if axis > nkeys - 1:
            raise IndexError(
                'only %g keys, cannot compute maximum along axis %g' %
                (nkeys, axis))
        data = data.map(lambda (k, v): (tuple(
            array(k)[arange(0, nkeys) != axis]), v)).reduceByKey(maximum)
        dims.min = list(array(dims.min)[arange(0, nkeys) != axis])
        dims.max = list(array(dims.max)[arange(0, nkeys) != axis])
        sorting = True  # will always need to sort because reduceByKey changes order

    if ind is None:
        result = data.map(lambda (_, v): float16(v)).collect()
        nout = size(result[0])
    else:
        result = data.map(lambda (_, v): float16(v[ind])).collect()
        nout = size(ind)

    if sorting is True:
        data = subtoind(data, dims.max)
        keys = data.map(lambda (k, _): int(k)).collect()
        result = array(
            [v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])

    # reshape into a dense array of shape (b, x, y, z)  or (b, x, y) or (b, x)
    # where b is the number of outputs per record
    out = transpose(reshape(result, ((nout, ) + dims.count())[::-1]))

    # flip xy for spatial data
    if size(dims.count()) == 3:  # (b, x, y, z) -> (b, y, x, z)
        out = out.transpose([0, 2, 1, 3])
    if size(dims.count()) == 2:  # (b, x, y) -> (b, y, x)
        out = out.transpose([0, 2, 1])

    return squeeze(out)
Exemplo n.º 4
0
def save(data,
         outputdir,
         outputfile,
         outputformat,
         sorting=False,
         dimsmax=None,
         dimsmin=None):
    """
    Save data to a variety of formats
    Automatically determines whether data is an array
    or an RDD and handle appropriately

    Parameters
    ----------
    data : RDD of (tuple, array) pairs, or numpy array
        The data to save

    outputdir : str
        Output directory

    outputfile : str
        Output filename

    outputformat : str
        Output format ("matlab", "text", or "image")
    """

    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    filename = os.path.join(outputdir, outputfile)

    if isrdd(data):
        nout = size(data.first()[1])
        if dimsmax is not None:
            dims = Dimensions()
            dims.max = dimsmax
            if dimsmin is not None:
                dims.min = dimsmin
            else:
                dims.min = (1, 1, 1)
        elif dimsmin is not None:
            raise Exception('cannot provide dimsmin without dimsmax')
        else:
            dims = getdims(data)

    if (outputformat == "matlab") | (outputformat == "text"):
        if isrdd(data):
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    if outputformat == "matlab":
                        savemat(filename + "-" + str(iout) + ".mat",
                                mdict={outputfile + str(iout): result},
                                oned_as='column',
                                do_compression='true')
                    if outputformat == "text":
                        savetxt(filename + "-" + str(iout) + ".txt",
                                result,
                                fmt="%.6f")
            else:
                result = pack(data, dims=dims, sorting=sorting)
                if outputformat == "matlab":
                    savemat(filename + ".mat",
                            mdict={outputfile: result},
                            oned_as='column',
                            do_compression='true')
                if outputformat == "text":
                    savetxt(filename + ".txt", result, fmt="%.6f")
        else:
            if outputformat == "matlab":
                savemat(filename + ".mat",
                        mdict={outputfile: data},
                        oned_as='column',
                        do_compression='true')
            if outputformat == "text":
                savetxt(filename + ".txt", data, fmt="%.6f")

    if outputformat == "image":
        if isrdd(data):
            data = rescale(data)
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    arraytoim(result, filename + "-" + str(iout))
            else:
                result = pack(data, dims=dims, sorting=sorting)
                arraytoim(result, filename)
        else:
            arraytoim(data, filename)
Exemplo n.º 5
0
def save(data, outputdir, outputfile, outputformat, sorting=False, dimsmax=None, dimsmin=None):
    """
    Save data to a variety of formats
    Automatically determines whether data is an array
    or an RDD and handle appropriately

    Parameters
    ----------
    data : RDD of (tuple, array) pairs, or numpy array
        The data to save

    outputdir : str
        Output directory

    outputfile : str
        Output filename

    outputformat : str
        Output format ("matlab", "text", or "image")
    """

    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    filename = os.path.join(outputdir, outputfile)

    if isrdd(data):
        nout = size(data.first()[1])
        if dimsmax is not None:
            dims = Dimensions()
            dims.max = dimsmax
            if dimsmin is not None:
                dims.min = dimsmin
            else:
                dims.min = (1, 1, 1)
        elif dimsmin is not None:
            raise Exception('cannot provide dimsmin without dimsmax')
        else:
            dims = getdims(data)

    if (outputformat == "matlab") | (outputformat == "text"):
        if isrdd(data):
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    if outputformat == "matlab":
                        savemat(filename+"-"+str(iout)+".mat", mdict={outputfile+str(iout): result},
                                oned_as='column', do_compression='true')
                    if outputformat == "text":
                        savetxt(filename+"-"+str(iout)+".txt", result, fmt="%.6f")
            else:
                result = pack(data, dims=dims, sorting=sorting)
                if outputformat == "matlab":
                    savemat(filename+".mat", mdict={outputfile: result},
                            oned_as='column', do_compression='true')
                if outputformat == "text":
                    savetxt(filename+".txt", result, fmt="%.6f")
        else:
            if outputformat == "matlab":
                savemat(filename+".mat", mdict={outputfile: data}, oned_as='column', do_compression='true')
            if outputformat == "text":
                savetxt(filename+".txt", data, fmt="%.6f")

    if outputformat == "image":
        if isrdd(data):
            data = rescale(data)
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    arraytoim(result, filename+"-"+str(iout))
            else:
                result = pack(data, dims=dims, sorting=sorting)
                arraytoim(result, filename)
        else:
            arraytoim(data, filename)
Exemplo n.º 6
0
def pack(data, ind=None, dims=None, sorting=False, axis=None):
    """Pack an RDD into a dense local array, with options for
    sorting, reshaping, and projecting based on keys

    Parameters
    ----------
    data : RDD of (tuple, array) pairs
        The data to pack into a local array

    ind : int, optional, default = None
        An index, if each record has multiple entries

    dims : Dimensions, optional, default = None
        Dimensions of the keys, for use with sorting and reshaping

    sorting : Boolean, optional, default = False
        Whether to sort the RDD before packing

    axis : int, optional, default = None
        Which axis to do maximum projection along

    Returns
    -------
    result : array
        A local numpy array with the RDD contents

    """

    if dims is None:
        dims = getdims(data)

    if axis is not None:
        nkeys = len(data.first()[0])
        if axis > nkeys - 1:
            raise IndexError('only %g keys, cannot compute maximum along axis %g' % (nkeys, axis))
        data = data.map(lambda (k, v): (tuple(array(k)[arange(0, nkeys) != axis]), v)).reduceByKey(maximum)
        dims.min = list(array(dims.min)[arange(0, nkeys) != axis])
        dims.max = list(array(dims.max)[arange(0, nkeys) != axis])
        sorting = True  # will always need to sort because reduceByKey changes order

    if ind is None:
        result = data.map(lambda (_, v): float16(v)).collect()
        nout = size(result[0])
    else:
        result = data.map(lambda (_, v): float16(v[ind])).collect()
        nout = size(ind)

    if sorting is True:
        data = subtoind(data, dims.max)
        keys = data.map(lambda (k, _): int(k)).collect()
        result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])

    # reshape into a dense array of shape (b, x, y, z)  or (b, x, y) or (b, x)
    # where b is the number of outputs per record
    out = transpose(reshape(result, ((nout,) + dims.count())[::-1]))

    # flip xy for spatial data
    if size(dims.count()) == 3:  # (b, x, y, z) -> (b, y, x, z)
        out = out.transpose([0, 2, 1, 3])
    if size(dims.count()) == 2:  # (b, x, y) -> (b, y, x)
        out = out.transpose([0, 2, 1])

    return squeeze(out)