Пример #1
0
def savez(file, *args, **kwds):

    __doc__ = numpy.savez.__doc__
    import zipfile
    from numpy.lib import format

    if isinstance(file, basestring):
        if not file.endswith('.npz'):
            file = file + '.npz'

    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            raise ValueError, "Cannot use un-named variables and keyword %s" % key
        namedict[key] = val

    zip = zipfile.ZipFile(file, mode="w")

    # Place to write temporary .npy files
    #  before storing them in the zip. We need to path this to have a working
    # function in parallel !
    import tempfile
    direc = tempfile.mkdtemp()
    for key, val in namedict.iteritems():
        fname = key + '.npy'
        filename = os.path.join(direc, fname)
        fid = open(filename, 'wb')
        format.write_array(fid, numpy.asanyarray(val))
        fid.close()
        zip.write(filename, arcname=fname)
    zip.close()
    shutil.rmtree(direc)
Пример #2
0
def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue()
Пример #3
0
def roundtrip_truncated(arr):
    f = BytesIO()
    format.write_array(f, arr)
    #BytesIO is one byte short
    f2 = BytesIO(f.getvalue()[0:-1])
    arr2 = format.read_array(f2)
    return arr2
Пример #4
0
def test_memmap_roundtrip():
    # Fixme: test crashes nose on windows.
    if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, 'normal.npy')
            mfn = os.path.join(tempdir, 'memmap.npy')
            fp = open(nfn, 'wb')
            try:
                format.write_array(fp, arr)
            finally:
                fp.close()

            fortran_order = (
                arr.flags.f_contiguous and not arr.flags.c_contiguous)
            ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype,
                                    shape=arr.shape, fortran_order=fortran_order)
            ma[...] = arr
            del ma

            # Check that both of these files' contents are the same.
            fp = open(nfn, 'rb')
            normal_bytes = fp.read()
            fp.close()
            fp = open(mfn, 'rb')
            memmap_bytes = fp.read()
            fp.close()
            yield assert_equal_, normal_bytes, memmap_bytes

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode='r')
            del ma
Пример #5
0
def test_memmap_roundtrip():
    # XXX: test crashes nose on windows. Fix this
    if not (sys.platform == "win32" or sys.platform == "cygwin"):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, "normal.npy")
            mfn = os.path.join(tempdir, "memmap.npy")
            fp = open(nfn, "wb")
            try:
                format.write_array(fp, arr)
            finally:
                fp.close()

            fortran_order = arr.flags.f_contiguous and not arr.flags.c_contiguous
            ma = format.open_memmap(mfn, mode="w+", dtype=arr.dtype, shape=arr.shape, fortran_order=fortran_order)
            ma[...] = arr
            del ma

            # Check that both of these files' contents are the same.
            fp = open(nfn, "rb")
            normal_bytes = fp.read()
            fp.close()
            fp = open(mfn, "rb")
            memmap_bytes = fp.read()
            fp.close()
            yield assert_equal, normal_bytes, memmap_bytes

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode="r")
            # yield assert_array_equal, ma, arr
            del ma
Пример #6
0
    def savez(self, *args, **kwds):
        import os
        import numpy.lib.format as format

        namedict = kwds
        for val in args:
            key = 'arr_%d' % self.i
            if key in namedict.keys():
                raise ValueError(
              "Cannot use un-named variables and keyword %s" % key)
            namedict[key] = val
            self.i += 1

        try:
            for key, val in namedict.iteritems():
                fname = key + '.npy'
                fid = open(self.tmpfile, 'wb')
                try:
                    format.write_array(fid, np.asanyarray(val))
                    fid.close()
                    fid = None
                    self.zip.write(self.tmpfile, arcname=fname)
                finally:
                    if fid:
                        fid.close()
        finally:
            os.remove(self.tmpfile)
Пример #7
0
def test_read_array_header_2_0():
    s = BytesIO()

    arr = np.ones((3, 6), dtype=float)
    format.write_array(s, arr, version=(2, 0))

    s.seek(format.MAGIC_LEN)
    shape, fortran, dtype = format.read_array_header_2_0(s)

    assert_((shape, fortran, dtype) == ((3, 6), False, float))
Пример #8
0
def test_read_array_header_2_0():
    s = BytesIO()

    arr = np.ones((3, 6), dtype=float)
    format.write_array(s, arr, version=(2, 0))

    s.seek(format.MAGIC_LEN)
    shape, fortran, dtype = format.read_array_header_2_0(s)

    assert_(s.tell() % format.ARRAY_ALIGN == 0)
    assert_((shape, fortran, dtype) == ((3, 6), False, float))
Пример #9
0
def save(file, iarray, metafile=None, version=(1, 0)):
    """Save a info array to a .npy file and a metadata file.

    Similar to the numpy.save function.

    Parameters
    ----------
    file: file handle or str
        File or file name to write the array to in .npy format.
    iarray: InfoArray object or array with similar interface
        Array to be written to file with meta data.
    metafile: str
        File name for the meta data.  The `info` attribute of `iarray` will be
        written here. Default is None, where the it is
        assumed to be the file name associated with `file` with ".meta"
        appended.
    """

    # Restrict to version (1,0) because we've only written write_header for
    # this version.
    if version != (1, 0):
        raise ValueError("Only version (1,0) is safe from this function.")

    # Make sure that the meta data will be representable as a string.
    infostring = repr(iarray.info)
    try:
        safe_eval(infostring)
    except SyntaxError:
        raise ValueError

    # Save the array in .npy format.
    if isinstance(file, basestring):
        fid = open(file, "wb")
    else:
        fid = file

    npfor.write_array(fid, iarray, version=version)

    # Figure out what the filename for the meta data should be.
    if metafile is None:
        try:
            fname = file.name
        except AttributeError:
            fname = file
        metafile = fname + ".meta"

    # Save the meta data.
    info_fid = open(metafile, 'w')
    try:
        info_fid.write(infostring)
    finally:
        info_fid.close()
Пример #10
0
def save(file, iarray, metafile=None, version=(1, 0)):
    """Save a info array to a .npy file and a metadata file.

    Similar to the numpy.save function.

    Parameters
    ----------
    file: file handle or str
        File or file name to write the array to in .npy format.
    iarray: InfoArray object or array with similar interface
        Array to be written to file with meta data.
    metafile: str
        File name for the meta data.  The `info` attribute of `iarray` will be
        written here. Default is None, where the it is
        assumed to be the file name associated with `file` with ".meta"
        appended.
    """

    # Restrict to version (1,0) because we've only written write_header for
    # this version.
    if version != (1, 0):
        raise ValueError("Only version (1,0) is safe from this function.")

    # Make sure that the meta data will be representable as a string.
    infostring = repr(iarray.info)
    try:
        safe_eval(infostring)
    except SyntaxError:
        raise ValueError

    # Save the array in .npy format.
    if isinstance(file, basestring):
        fid = open(file, "wb")
    else:
        fid = file

    npfor.write_array(fid, iarray, version=version)

    # Figure out what the filename for the meta data should be.
    if metafile is None:
        try:
            fname = file.name
        except AttributeError:
            fname = file
        metafile = fname + ".meta"

    # Save the meta data.
    info_fid = open(metafile, 'w')
    try:
        info_fid.write(infostring)
    finally:
        info_fid.close()
Пример #11
0
def _savez(file, args, kwds, compress):
    # Import is postponed to here since zipfile depends on gzip, an optional
    # component of the so-called standard library.
    import zipfile

    # Import deferred for startup time improvement
    import tempfile

    if isinstance(file, basestring):
        if not file.endswith(".npz"):
            file = file + ".npz"

    namedict = kwds
    for i, val in enumerate(args):
        key = "arr_%d" % i
        if key in namedict.keys():
            msg = "Cannot use un-named variables and keyword %s" % key
            raise ValueError, msg
        namedict[key] = val

    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED

    zip = zipfile_factory(file, mode="w", compression=compression)

    # Stage arrays in a temporary file on disk, before writing to zip.
    fd, tmpfile = tempfile.mkstemp(suffix="-numpy.npy")
    os.close(fd)
    try:
        for key, val in namedict.iteritems():
            fname = key + ".npy"
            fid = open(tmpfile, "wb")
            try:
                format.write_array(fid, numpy.asanyarray(val))
                fid.close()
                fid = None
                zip.write(tmpfile, arcname=fname)
            finally:
                if fid:
                    fid.close()
    finally:
        os.remove(tmpfile)

    zip.close()
Пример #12
0
def _savez(file, args, kwds, compress):
    # Import is postponed to here since zipfile depends on gzip, an optional
    # component of the so-called standard library.
    import zipfile
    # Import deferred for startup time improvement
    import tempfile

    if isinstance(file, basestring):
        if not file.endswith('.npz'):
            file = file + '.npz'

    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            msg = "Cannot use un-named variables and keyword %s" % key
            raise ValueError, msg
        namedict[key] = val

    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED

    zip = zipfile_factory(file, mode="w", compression=compression)

    # Stage arrays in a temporary file on disk, before writing to zip.
    fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
    os.close(fd)
    try:
        for key, val in namedict.iteritems():
            fname = key + '.npy'
            fid = open(tmpfile, 'wb')
            try:
                format.write_array(fid, numpy.asanyarray(val))
                fid.close()
                fid = None
                zip.write(tmpfile, arcname=fname)
            finally:
                if fid:
                    fid.close()
    finally:
        os.remove(tmpfile)

    zip.close()
Пример #13
0
def test_version_2_0():
    f = BytesIO()
    # requires more than 2 byte for header
    dt = [(("%d" % i) * 100, float) for i in range(500)]
    d = np.ones(1000, dtype=dt)

    format.write_array(f, d, version=(2, 0))
    with warnings.catch_warnings(record=True) as w:
        warnings.filterwarnings('always', '', UserWarning)
        format.write_array(f, d)
        assert_(w[0].category is UserWarning)

    f.seek(0)
    n = format.read_array(f)
    assert_array_equal(d, n)

    # 1.0 requested but data cannot be saved this way
    assert_raises(ValueError, format.write_array, f, d, (1, 0))
Пример #14
0
def test_read_magic():
    s1 = BytesIO()
    s2 = BytesIO()

    arr = np.ones((3, 6), dtype=float)

    format.write_array(s1, arr, version=(1, 0))
    format.write_array(s2, arr, version=(2, 0))

    s1.seek(0)
    s2.seek(0)

    version1 = format.read_magic(s1)
    version2 = format.read_magic(s2)

    assert_(version1 == (1, 0))
    assert_(version2 == (2, 0))

    assert_(s1.tell() == format.MAGIC_LEN)
    assert_(s2.tell() == format.MAGIC_LEN)
Пример #15
0
def test_unicode_field_names():
    # gh-7391
    arr = np.array(
        [(1, 3), (1, 2), (1, 3), (1, 2)],
        dtype=[
            ('int', int),
            (u'\N{CJK UNIFIED IDEOGRAPH-6574}\N{CJK UNIFIED IDEOGRAPH-5F62}',
             int)
        ])
    fname = os.path.join(tempdir, "unicode.npy")
    with open(fname, 'wb') as f:
        format.write_array(f, arr, version=(3, 0))
    with open(fname, 'rb') as f:
        arr2 = format.read_array(f)
    assert_array_equal(arr, arr2)

    # notifies the user that 3.0 is selected
    with open(fname, 'wb') as f:
        with assert_warns(UserWarning):
            format.write_array(f, arr, version=None)
Пример #16
0
def savez(file, version, *args, compress=True, **kwargs):
    namedict = kwargs
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            raise ValueError("Cannot use un-named variables and keyword %s" %
                             key)
        namedict[key] = val

    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED

    with zipfile_factory(file, mode="a", compression=compression) as zipf:
        # Write file format version
        zipf.writestr('version', str(version))

        # Write directly to a ZIP file
        for key, val in namedict.items():
            if isinstance(val, (str, bytes)):
                zipf.writestr(key, val)
            else:
                try:
                    s = json.dumps(val, indent=4)
                except TypeError:
                    if type(val).__module__ == 'pandas.core.frame':
                        fname = key + '.parquet'
                        force_zip64 = val.values.nbytes >= 2**30
                        with zipf.open(fname, 'w',
                                       force_zip64=force_zip64) as fid:
                            pq.write_table(pa.Table.from_pandas(val), fid)
                    else:
                        fname = key + '.npy'
                        val = np.asanyarray(val)
                        force_zip64 = val.nbytes >= 2**30
                        with zipf.open(fname, 'w',
                                       force_zip64=force_zip64) as fid:
                            format.write_array(fid, val, allow_pickle=False)
                else:
                    zipf.writestr(key, s)
Пример #17
0
    def savez(self, *args, **kwds):
        import os
        import numpy.lib.format as fmt

        namedict = kwds
        for val in args:
            key = 'arr_%d' % self._i
            if key in namedict.keys():
                raise ValueError("Cannot use un-named variables and keyword %s" % key)
            namedict[key] = val
            self._i += 1

        try:
            for key, val in namedict.items():
                fname = key + '.npy'
                fid = open(self.tmpfile, 'wb')
                with open(self.tmpfile, 'wb') as fid:
                    fmt.write_array(fid, np.asanyarray(val), allow_pickle=True)
                self.zip.write(self.tmpfile, arcname=fname)
        finally:
            os.remove(self.tmpfile)
Пример #18
0
def test_write_version_1_0():
    f = StringIO()
    arr = np.arange(1)
    # These should pass.
    format.write_array(f, arr, version=(1, 0))
    format.write_array(f, arr)

    # These should all fail.
    bad_versions = [
        (1, 1),
        (0, 0),
        (0, 1),
        (2, 0),
        (2, 2),
        (255, 255),
    ]
    for version in bad_versions:
        try:
            format.write_array(f, arr, version=version)
        except ValueError:
            pass
        else:
            raise AssertionError(
                "we should have raised a ValueError for the bad version %r" %
                (version, ))
Пример #19
0
def _savez(file, args, kwds, compress):
    if isinstance(file, basestring):
        if not file.endswith('.npz'):
            file = file + '.npz'

    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            msg = "Cannot use un-named variables and keyword %s" % key
            raise ValueError, msg
        namedict[key] = val

    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED

    zip = zipfile_factory(file, mode="w", compression=compression)

    # Stage arrays in a temporary file on disk, before writing to zip.
    fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
    os.close(fd)
    try:
        for key, val in namedict.iteritems():
            fname = key + '.npy'
            fid = open(tmpfile, 'wb')
            try:
                format.write_array(fid, numpy.asanyarray(val))
                fid.close()
                fid = None
                zip.write(tmpfile, arcname=fname)
            finally:
                if fid:
                    fid.close()
    finally:
        os.remove(tmpfile)

    zip.close()
Пример #20
0
def test_memmap_roundtrip():
    # XXX: test crashes nose on windows. Fix this
    if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, 'normal.npy')
            mfn = os.path.join(tempdir, 'memmap.npy')
            fp = open(nfn, 'wb')
            try:
                format.write_array(fp, arr)
            finally:
                fp.close()

            fortran_order = (arr.flags.f_contiguous
                             and not arr.flags.c_contiguous)
            ma = format.open_memmap(mfn,
                                    mode='w+',
                                    dtype=arr.dtype,
                                    shape=arr.shape,
                                    fortran_order=fortran_order)
            ma[...] = arr
            del ma

            # Check that both of these files' contents are the same.
            fp = open(nfn, 'rb')
            normal_bytes = fp.read()
            fp.close()
            fp = open(mfn, 'rb')
            memmap_bytes = fp.read()
            fp.close()
            yield assert_equal, normal_bytes, memmap_bytes

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode='r')
            #yield assert_array_equal, ma, arr
            del ma
Пример #21
0
def test_memmap_roundtrip():
    # Fixme: used to crash on windows
    if not (sys.platform == "win32" or sys.platform == "cygwin"):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, "normal.npy")
            mfn = os.path.join(tempdir, "memmap.npy")
            fp = open(nfn, "wb")
            try:
                format.write_array(fp, arr)
            finally:
                fp.close()

            fortran_order = arr.flags.f_contiguous and not arr.flags.c_contiguous
            ma = format.open_memmap(
                mfn,
                mode="w+",
                dtype=arr.dtype,
                shape=arr.shape,
                fortran_order=fortran_order,
            )
            ma[...] = arr
            del ma

            # Check that both of these files' contents are the same.
            fp = open(nfn, "rb")
            normal_bytes = fp.read()
            fp.close()
            fp = open(mfn, "rb")
            memmap_bytes = fp.read()
            fp.close()
            assert_equal_(normal_bytes, memmap_bytes)

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode="r")
            del ma
Пример #22
0
def test_version_2_0():
    f = BytesIO()
    # requires more than 2 byte for header
    dt = [(("%d" % i) * 100, float) for i in range(500)]
    d = np.ones(1000, dtype=dt)

    format.write_array(f, d, version=(2, 0))
    with warnings.catch_warnings(record=True) as w:
        warnings.filterwarnings("always", "", UserWarning)
        format.write_array(f, d)
        assert_(w[0].category is UserWarning)

    # check alignment of data portion
    f.seek(0)
    header = f.readline()
    assert_(len(header) % format.ARRAY_ALIGN == 0)

    f.seek(0)
    n = format.read_array(f)
    assert_array_equal(d, n)

    # 1.0 requested but data cannot be saved this way
    assert_raises(ValueError, format.write_array, f, d, (1, 0))
Пример #23
0
def test_memmap_roundtrip():
    # Fixme: used to crash on windows
    if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, 'normal.npy')
            mfn = os.path.join(tempdir, 'memmap.npy')
            with open(nfn, 'wb') as fp:
                format.write_array(fp, arr)

            fortran_order = (arr.flags.f_contiguous
                             and not arr.flags.c_contiguous)
            ma = format.open_memmap(mfn,
                                    mode='w+',
                                    dtype=arr.dtype,
                                    shape=arr.shape,
                                    fortran_order=fortran_order)
            ma[...] = arr
            del ma
            if IS_PYPY:
                break_cycles()

            # Check that both of these files' contents are the same.
            with open(nfn, 'rb') as fp:
                normal_bytes = fp.read()
            with open(mfn, 'rb') as fp:
                memmap_bytes = fp.read()
            assert_equal_(normal_bytes, memmap_bytes)

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode='r')
            del ma
            if IS_PYPY:
                break_cycles()
Пример #24
0
def test_ensemble_matrix_json(client, simple_ensemble, get, post):
    from numpy.lib.format import write_array, read_array

    ensemble_id = simple_ensemble()

    matrix = np.random.rand(5, 8, 13)

    # POST
    post_url = f"/ensembles/{ensemble_id}/records/mat/matrix"
    if post == "json":
        resp = client.post(post_url, json=matrix.tolist())
    elif post == "numpy":
        stream = io.BytesIO()
        write_array(stream, matrix)
        resp = client.post(
            post_url,
            data=stream.getvalue(),
            headers={"content-type": "application/x-numpy"},
        )
    else:
        raise NotImplementedError()

    # GET
    get_url = f"/ensembles/{ensemble_id}/records/mat"
    if get == "json":
        resp = client.get(f"/ensembles/{ensemble_id}/records/mat")
        assert resp.json() == matrix.tolist()
    elif get == "numpy":
        resp = client.get(
            f"/ensembles/{ensemble_id}/records/mat",
            headers={"accept": "application/x-numpy"},
        )
        stream = io.BytesIO(resp.content)
        assert (read_array(stream) == matrix).all()
    else:
        raise NotImplementedError()
Пример #25
0
def test_write_version_1_0():
    f = StringIO()
    arr = np.arange(1)
    # These should pass.
    format.write_array(f, arr, version=(1, 0))
    format.write_array(f, arr)

    # These should all fail.
    bad_versions = [(1, 1), (0, 0), (0, 1), (2, 0), (2, 2), (255, 255)]
    for version in bad_versions:
        try:
            format.write_array(f, arr, version=version)
        except ValueError:
            pass
        else:
            raise AssertionError("we should have raised a ValueError for the bad version %r" % (version,))
Пример #26
0
def to_string(arr): 
    f = StringIO() 
    format.write_array(f, arr) 
    s = f.getvalue() 
    return s 
Пример #27
0
def toSparse(source, idx2label, Format='NPZ'):
    """
    Convert intra-chromosomal contact matrices to sparse ones.
    
    Parameters
    ----------
    source : str
         Hdf5 file name.
    
    idx2label : dict
        A dictionary for conversion between zero-based indices and
        string chromosome labels.
    
    Format : {'NPZ', 'HDF5'}
        Output format. (Default: HDF5)
    
    """
    lib = h5dict(source, mode='r')

    ## Uniform numpy-structured-array format
    itype = np.dtype({
        'names': ['bin1', 'bin2', 'IF'],
        'formats': [np.int, np.int, np.float]
    })

    ## Create a Zip file in NPZ case
    if Format.upper() == 'NPZ':
        output = source.replace('.hm', '-sparse.npz')
        Zip = zipfile.ZipFile(output, mode='w', allowZip64=True)
        fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
        os.close(fd)

    if Format.upper() == 'HDF5':
        output = source.replace('.hm', '-sparse.hm')
        odict = h5dict(output)

    log.log(21, 'Sparse Matrices will be saved to %s', output)
    log.log(21, 'Only intra-chromosomal matrices will be taken into account')
    log.log(21, 'Coverting ...')

    for i in lib:
        if (i != 'resolution') and (len(set(i.split())) == 1):
            # Used for the dict-like key
            key = idx2label[int(i.split()[0])]

            log.log(21, 'Chromosome %s ...', key)
            # 2D-Matrix
            H = lib[i]

            # Triangle Array
            Triu = np.triu(H)
            # Sparse Matrix in Memory
            x, y = np.nonzero(Triu)
            values = Triu[x, y]
            sparse = np.zeros(values.size, dtype=itype)
            sparse['bin1'] = x
            sparse['bin2'] = y
            sparse['IF'] = values

            if Format.upper() == 'HDF5':
                # Really Simple, just like a dictionary
                odict[key] = sparse
            if Format.upper() == 'NPZ':
                # Much more complicated, but we need make a compatible
                # file interface for other API
                fname = key + '.npy'
                fid = open(tmpfile, 'wb')
                try:
                    write_array(fid, np.asanyarray(sparse))
                    fid.close()
                    fid = None
                    Zip.write(tmpfile, arcname=fname)
                finally:
                    if fid:
                        fid.close()
            log.log(21, 'Done!')

    os.remove(tmpfile)

    Zip.close()
Пример #28
0
def test_write_version():
    f = BytesIO()
    arr = np.arange(1)
    # These should pass.
    format.write_array(f, arr, version=(1, 0))
    format.write_array(f, arr)

    format.write_array(f, arr, version=None)
    format.write_array(f, arr)

    format.write_array(f, arr, version=(2, 0))
    format.write_array(f, arr)

    # These should all fail.
    bad_versions = [
        (1, 1),
        (0, 0),
        (0, 1),
        (2, 2),
        (255, 255),
    ]
    for version in bad_versions:
        with assert_raises_regex(ValueError,
                                 'we only support format version.*'):
            format.write_array(f, arr, version=version)
Пример #29
0
def _savez(file, args, kwds, compress):
    # Import is postponed to here since zipfile depends on gzip, an optional
    # component of the so-called standard library.
    import zipfile
    import tempfile
    import shutil

    if isinstance(file, basestring):
        if not file.endswith('.npz'):
            file = file + '.npz'

    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            raise ValueError("Cannot use un-named variables and keyword %s" % key)
        namedict[key] = val

    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED

    # use compress keyword if given; only active in savez
    if 'compress' in namedict.keys():
        if namedict['compress']:
            compression = zipfile.ZIP_DEFLATED
        del namedict['compress']

    # append or update
    if 'append' in namedict.keys():
        appendit = namedict['append']
        del namedict['append']
    else:
        appendit = False
    if 'update' in namedict.keys():
        updateit = namedict['update']
        del namedict['update']
    else:
        updateit = False
    if appendit and updateit:
        raise KeyError("append and update mutually exclusive.")

    # check if file exists, otherwise it will be a simple write
    if not os.path.isfile(file):
        appendit = False
        updateit = False
        inzipf   = []
    else:
        zipf   = zipfile.ZipFile(file, mode="r")
        inzipf = zipf.namelist()
        inzipf = [ i[:-4] for i in inzipf ]
        zipf.close()
    allkeys = set(namedict.keys())
    allkeys.update(inzipf)

    # append if keyword is True
    if appendit:
        mode = "a"
        # check if new arrays already exist in zip file
        if len(inzipf) != 0:
            for key in namedict:
                if key in inzipf:
                    raise ValueError("array name already in npz-file: %s" % key)
    else:
        mode = "w"

    if not updateit:
        # Just add arrays to existing or non-existing file; duplicates were checked before
        zipf = zipfile.ZipFile(file, mode=mode, compression=compression, allowZip64=True)
        # Stage arrays in a temporary file on disk, before writing to zip.
        fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
        os.close(fd)
        try:
            for key, val in namedict.items():
                fname = key + '.npy'
                fid = open(tmpfile, 'wb')
                try:
                    format.write_array(fid, np.asanyarray(val))
                    fid.close()
                    fid = None
                    zipf.write(tmpfile, arcname=fname)
                finally:
                    if fid:
                        fid.close()
        finally:
            os.remove(tmpfile)
        zipf.close()
    else:
        # open existing zip file in read mode
        zipr = zipfile.ZipFile(file, mode="r")
        # open temporary zip file in write mode
        tempdir = tempfile.mkdtemp()
        try:
            tempname = os.path.join(tempdir, 'new.zip')
            zipw = zipfile.ZipFile(tempname, mode="w", compression=compression, allowZip64=True)
            for key in allkeys:
                # if in namedict then write new, else extract it from zipfile
                if key in namedict.keys():
                    # Stage arrays in a temporary file on disk, before writing to zip.
                    fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
                    os.close(fd)
                    try:
                        fname = key + '.npy'
                        fid = open(tmpfile, 'wb')
                        try:
                            format.write_array(fid, np.asanyarray(namedict[key]))
                            fid.close()
                            fid = None
                            zipw.write(tmpfile, arcname=fname)
                        finally:
                            if fid:
                                fid.close()
                    finally:
                        os.remove(tmpfile)
                else:
                    fname = key + '.npy'
                    zipr.extract(fname, tempdir)
                    tmpfile = os.path.join(tempdir, fname)
                    zipw.write(tmpfile, arcname=fname)
                    os.remove(tmpfile)
            # close both files and move new to old
            zipr.close()
            zipw.close()
            shutil.move(tempname, file)
        finally:
            shutil.rmtree(tempdir)
Пример #30
0
def tobytes(array):
    fp = BytesIO()
    write_array(fp, array, allow_pickle=False)
    return fp.getvalue()
Пример #31
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2, allow_pickle=True)
    return arr2
Пример #32
0
def toSparse(source, csr=False):
    """
    Convert intra-chromosomal contact matrices to sparse ones.
    
    Parameters
    ----------
    source : str
         Hdf5 file name.
    
    idx2label : dict
        A dictionary for conversion between zero-based indices and
        string chromosome labels.
    
    csr : bool
        Whether to use CSR (Compressed Row Storage) format or not.
    
    """
    import zipfile, tempfile
    from numpy.lib.format import write_array
    from scipy import sparse

    lib = h5dict(source, mode='r')

    ## Uniform numpy-structured-array format
    itype = np.dtype({
        'names': ['bin1', 'bin2', 'IF'],
        'formats': [np.int, np.int, np.float]
    })

    ## Create a Zip file in NPZ case
    if not csr:
        output = source.replace('.hm', '-sparse.npz')
    else:
        output = source.replace('.hm', '-csrsparse.npz')

    Zip = zipfile.ZipFile(output, mode='w', allowZip64=True)
    fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
    os.close(fd)

    log.log(21, 'Sparse Matrices will be saved to %s', output)
    log.log(21, 'Only intra-chromosomal matrices will be taken into account')
    log.log(21, 'Coverting ...')

    count = 0

    for i in lib:
        if (i != 'resolution') and (i != 'genomeInformation') and (len(
                set(i.split())) == 1):
            # Used for the dict-like key
            key = lib['genomeInformation']['idx2label'][int(i.split()[0])]

            log.log(21, 'Chromosome %s ...', key)
            # 2D-Matrix
            H = lib[i]

            if not csr:
                # Triangle Array
                Triu = np.triu(H)
                # Sparse Matrix in Memory
                x, y = np.nonzero(Triu)
                values = Triu[x, y]
                temp = np.zeros(values.size, dtype=itype)
                temp['bin1'] = x
                temp['bin2'] = y
                temp['IF'] = values
            else:
                temp = sparse.triu(H, format='csr')

            fname = key + '.npy'
            fid = open(tmpfile, 'wb')
            try:
                write_array(fid, np.asanyarray(temp))
                fid.close()
                fid = None
                Zip.write(tmpfile, arcname=fname)
            finally:
                if fid:
                    fid.close()

            log.log(21, 'Done!')

            count += 1

    if count == 0:
        log.warning('Empty source file!')

    # Other information
    for i in ['resolution', 'genomeInformation']:
        fname = '.'.join([i, 'npy'])
        fid = open(tmpfile, 'wb')
        try:
            write_array(fid, np.asanyarray(lib[i]))
            fid.close()
            fid = None
            Zip.write(tmpfile, arcname=fname)
        finally:
            if fid:
                fid.close()

    os.remove(tmpfile)

    Zip.close()
Пример #33
0
def to_string(arr):
    f = StringIO()
    format.write_array(f, arr)
    s = f.getvalue()
    return s
Пример #34
0
def roundtrip_randsize(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIOSRandomSize(f.getvalue())
    arr2 = format.read_array(f2)
    return arr2
Пример #35
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2)
    return arr2
Пример #36
0
def toSparse(source, idx2label, csr = False):
    """
    Convert intra-chromosomal contact matrices to sparse ones.
    
    Parameters
    ----------
    source : str
         Hdf5 file name.
    
    idx2label : dict
        A dictionary for conversion between zero-based indices and
        string chromosome labels.
    
    csr : bool
        Whether to use CSR (Compressed Row Storage) format or not.
    
    """
    import zipfile, tempfile
    from numpy.lib.format import write_array
    from scipy import sparse
    
    lib = h5dict(source, mode = 'r')
    
    ## Uniform numpy-structured-array format
    itype = np.dtype({'names':['bin1', 'bin2', 'IF'],
                          'formats':[np.int, np.int, np.float]})
    
    ## Create a Zip file in NPZ case
    if not csr:
        output = source.replace('.hm', '-sparse.npz')
    else:
        output = source.replace('.hm', '-csrsparse.npz')
    
    Zip = zipfile.ZipFile(output, mode = 'w', allowZip64 = True)
    fd, tmpfile = tempfile.mkstemp(suffix = '-numpy.npy')
    os.close(fd)
    
    log.log(21, 'Sparse Matrices will be saved to %s', output)
    log.log(21, 'Only intra-chromosomal matrices will be taken into account')
    log.log(21, 'Coverting ...')
    
    count = 0
    
    for i in lib:
        if (i != 'resolution') and (len(set(i.split())) == 1):
            # Used for the dict-like key
            key = idx2label[int(i.split()[0])]
            
            log.log(21, 'Chromosome %s ...', key)
            # 2D-Matrix
            H = lib[i]
            
            if not csr:
                # Triangle Array
                Triu = np.triu(H)
                # Sparse Matrix in Memory
                x, y = np.nonzero(Triu)
                values = Triu[x, y]
                temp = np.zeros(values.size, dtype = itype)
                temp['bin1'] = x
                temp['bin2'] = y
                temp['IF'] = values
            else:
                temp = sparse.triu(H, format = 'csr')
            
            fname = key + '.npy'
            fid = open(tmpfile, 'wb')
            try:
                write_array(fid, np.asanyarray(temp))
                fid.close()
                fid = None
                Zip.write(tmpfile, arcname = fname)
            finally:
                if fid:
                    fid.close()
                    
            log.log(21, 'Done!')
            
            count += 1
            
    # Store the resolution information
    if 'resolution' in lib:
        fname = 'resolution.npy'
        fid = open(tmpfile, 'wb')
        try:
            write_array(fid, np.asanyarray(lib['resolution']))
            fid.close()
            fid = None
            Zip.write(tmpfile, arcname = fname)
        finally:
            if fid:
                fid.close()
    
    if count == 0:
        log.warning('Empty source file!')
    
    os.remove(tmpfile)
    
    Zip.close()
Пример #37
0
	def toString(data):
		f= StringIO()
		format.write_array(f,data)
		return f.getvalue()
Пример #38
0
 def __init__(self, datasets, chroms=['#','X'], maxsize=4000000, npzpre=None, cache=None):
     
     self.chroms = set(chroms)
     data = datasets
     
     self._npzpre = npzpre
     if not self._npzpre is None:
         self._npzpre = os.path.abspath(os.path.expanduser(npzpre))
         for res in data:
             for rep in data[res]:
                 rl = '%dK' % (res//1000)
                 output = '.'.join([self._npzpre, rl, rep, 'npz'])
                 if os.path.exists(output):
                     log.error('The destination npz file will be overriden, reset npz prefix and run again ...')
                     log.error('Exit ...')
                     sys.exit(1)
     
     # We don't read data in memory at this point.
     # We only construct the mapping for loading convenience
     self.data = {}
     for res in data:
         for rep in data[res]:
             if data[res][rep].endswith('.npz'):
                 lib = np.load(data[res][rep])
                 for i in lib.files:
                     if ((not self.chroms) or (i.isdigit() and '#' in self.chroms)
                        or (i in self.chroms)):
                         if not i in self.data:
                             self.data[i] = {res:{rep:lib}}
                         else:
                             if res in self.data[i]:
                                 self.data[i][res][rep] = lib
                             else:
                                 self.data[i][res] = {rep:lib}
             else:
                 Map = self._scanFolder(data[res][rep])
                 for i in Map:
                     if not i in self.data:
                         self.data[i] = {res:{rep:Map[i]}}
                     else:
                         if res in self.data[i]:
                             self.data[i][res][rep] = Map[i]
                         else:
                             self.data[i][res] = {rep:Map[i]}
         
     
     if cache is None:
         self._cache = tempfile.gettempdir()
     else:
         self._cache = os.path.abspath(os.path.expanduser(cache))
         if not os.path.isdir(cache):
             os.makedirs(cache)
     
     self._intertype = np.dtype({'names':['bin1', 'bin2', 'IF'],
                                 'formats':[np.int, np.int, np.float]})
     
     # Before starting calling, we make cache data under the cache folder
     for chrom in self.data:
         log.debug('Chrom %s:', chrom)
         ms = self.data[chrom]
         for res in ms:
             for rep in ms[res]:
                 log.debug('  resolution: %d, %s', res, rep)
                 if type(ms[res][rep])==str:
                     tdata = np.loadtxt(ms[res][rep], dtype = self._intertype)
                 else:
                     tdata = ms[res][rep][chrom]
                 work = Chrom(chrom, res, tdata, rep, maxsize)
                 work.Label = rep
                 tl = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
                 kw = {'suffix':tl, 'dir':self._cache}
                 fd, tmpfil = tempfile.mkstemp(**kw)
                 os.close(fd)
                 with open(tmpfil, 'wb') as output:
                     log.debug('  Cache Chrom object into %s ...', tmpfil)
                     cPickle.dump(work, output, protocol = 2)
                 self.data[chrom][res][rep] = tmpfil
                 
                 time.sleep(3)
                 
                 if not self._npzpre is None:
                     rl = '%dK' % (res//1000)
                     output = '.'.join([self._npzpre, rl, rep, 'npz'])
                     if not os.path.exists(output):
                         Zip = zipfile.ZipFile(output, mode = 'w', allowZip64 = True)
                     else:
                         Zip = zipfile.ZipFile(output, mode = 'a', allowZip64 = True)
                     tl = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
                     fd, tmpfile = tempfile.mkstemp(suffix = '.'.join([tl, 'npy']))
                     os.close(fd)
                     log.debug('  Save data into %s for accelerating IO next time ...', output)
                     fname = '.'.join([chrom, 'npy'])
                     fid = open(tmpfile, 'wb')
                     try:
                         write_array(fid, tdata)
                         fid.close()
                         fid = None
                         Zip.write(tmpfile, arcname = fname)
                     finally:
                         if fid:
                             fid.close()
                     
                     os.remove(tmpfile)
                     Zip.close()
Пример #39
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2, allow_pickle=True)
    return arr2
Пример #40
0
def test_write_version():
    f = BytesIO()
    arr = np.arange(1)
    # These should pass.
    format.write_array(f, arr, version=(1, 0))
    format.write_array(f, arr)

    format.write_array(f, arr, version=None)
    format.write_array(f, arr)

    format.write_array(f, arr, version=(2, 0))
    format.write_array(f, arr)

    # These should all fail.
    bad_versions = [
        (1, 1),
        (0, 0),
        (0, 1),
        (2, 2),
        (255, 255),
    ]
    for version in bad_versions:
        with assert_raises_regex(ValueError,
                                 'we only support format version.*'):
            format.write_array(f, arr, version=version)
Пример #41
0
async def _get_record_data(
        record: ds.Record,
        accept: Optional[str],
        realization_index: Optional[int] = None) -> Response:
    type_ = record.record_info.record_type
    if type_ == ds.RecordType.f64_matrix:
        if realization_index is None:
            content = record.f64_matrix.content
        else:
            content = record.f64_matrix.content[realization_index]

        if accept == "application/x-numpy":
            from numpy.lib.format import write_array

            stream = io.BytesIO()
            write_array(stream, np.array(content))

            return Response(
                content=stream.getvalue(),
                media_type="application/x-numpy",
            )
        if accept == "text/csv":
            data = pd.DataFrame(content)
            labels = record.f64_matrix.labels
            if labels is not None and realization_index is None:
                data.columns = labels[0]
                data.index = labels[1]
            elif labels is not None and realization_index is not None:
                # The output is such that rows are realizations. Because
                # `content` is a 1d list in this case, it treats each element as
                # its own row. We transpose the data so that all of the data
                # falls on the same row.
                data = data.T
                data.columns = labels[0]
                data.index = [realization_index]

            return Response(
                content=data.to_csv().encode(),
                media_type="text/csv",
            )
        else:
            return content
    if type_ == ds.RecordType.file:
        f = record.file
        if f.content is not None:
            return Response(
                content=f.content,
                media_type=f.mimetype,
                headers={
                    "Content-Disposition":
                    f'attachment; filename="{f.filename}"'
                },
            )
        elif f.az_container is not None and f.az_blob is not None:
            blob = azure_blob_container.get_blob_client(f.az_blob)
            download = await blob.download_blob()

            async def chunk_generator() -> AsyncGenerator[bytes, None]:
                async for chunk in download.chunks():
                    yield chunk

            return StreamingResponse(
                chunk_generator(),
                media_type=f.mimetype,
                headers={
                    "Content-Disposition":
                    f'attachment; filename="{f.filename}"'
                },
            )
    raise NotImplementedError(
        f"Getting record data for type {type_} and Accept header {accept} not implemented"
    )
Пример #42
0
 def toString(self, data):
     f = StringIO()
     format.write_array(f, data)
     return f.getvalue()