Exemplo n.º 1
0
    def as_array(self, fields=None):
        if fields is None:
            fields = self.fields

        # csv file is assumed to be in the correct order (ie by period then id)
        datastream = self.read(fields)
        return fromiter(datastream, dtype=np.dtype(fields), count=self.numlines)
Exemplo n.º 2
0
def stream_to_table(h5file,
                    node,
                    name,
                    fields,
                    datastream,
                    numlines=None,
                    title=None,
                    invert=(),
                    buffersize=10 * 2**20,
                    compression=None):
    # make sure datastream is an iterator, not a list, otherwise it could
    # loop indefinitely as it will never be consumed.
    # Note that, contrary to what I thought, we shouldn't make a special case
    # for that as np.fromiter(islice(iter(l), max_rows)) is faster than
    # np.array(l[:max_rows])
    datastream = iter(datastream)
    msg, filters = compression_str2filter(compression)
    print(" - storing %s..." % msg)
    dtype = np.dtype(fields)
    table = h5file.create_table(node,
                                name,
                                dtype,
                                title=title,
                                filters=filters)
    # buffered load
    max_buffer_rows = buffersize // dtype.itemsize
    while True:
        dataslice = islice(datastream, max_buffer_rows)
        if numlines is not None:
            if numlines <= 0:
                break
            buffer_rows = min(numlines, max_buffer_rows)
            # ideally, we should preallocate an empty buffer and reuse it,
            # but that does not seem to be supported by numpy
            array = fromiter(dataslice, dtype=dtype, count=buffer_rows)
            numlines -= buffer_rows
        else:
            array = fromiter(dataslice, dtype=dtype)
            if not len(array):
                break

        for field in invert:
            array[field] = ~array[field]
        table.append(array)
        table.flush()

    return table
Exemplo n.º 3
0
def stream_to_table(
    h5file,
    node,
    name,
    fields,
    datastream,
    numlines=None,
    title=None,
    invert=(),
    buffersize=10 * 2 ** 20,
    compression=None,
):
    # make sure datastream is an iterator, not a list, otherwise it could
    # loop indefinitely as it will never be consumed.
    # Note that, contrary to what I thought, we shouldn't make a special case
    # for that as np.fromiter(islice(iter(l), max_rows)) is faster than
    # np.array(l[:max_rows])
    datastream = iter(datastream)
    msg, filters = compression_str2filter(compression)
    print " - storing %s..." % msg
    dtype = np.dtype(fields)
    table = h5file.createTable(node, name, dtype, title=title, filters=filters)
    # buffered load
    max_buffer_rows = buffersize / dtype.itemsize
    while True:
        dataslice = islice(datastream, max_buffer_rows)
        if numlines is not None:
            if numlines <= 0:
                break
            buffer_rows = min(numlines, max_buffer_rows)
            # ideally, we should preallocate an empty buffer and reuse it,
            # but that does not seem to be supported by numpy
            array = fromiter(dataslice, dtype=dtype, count=buffer_rows)
            numlines -= buffer_rows
        else:
            array = fromiter(dataslice, dtype=dtype)
            if not len(array):
                break

        for field in invert:
            array[field] = ~array[field]
        table.append(array)
        table.flush()

    return table
Exemplo n.º 4
0
    def as_array(self, fields=None):
        if fields is None:
            fields = self.fields

        # csv file is assumed to be in the correct order (ie by period then id)
        datastream = self.read(fields)
        return fromiter(datastream,
                        dtype=np.dtype(fields),
                        count=self.numlines)
Exemplo n.º 5
0
def stream_to_array(fields, datastream, numlines=None, invert=()):
    # make sure datastream is an iterator, not a list, otherwise it could
    # loop indefinitely as it will never be consumed.
    # Note that, contrary to what I thought, we shouldn't make a special case
    # for that as np.fromiter(islice(iter(l), max_rows)) is faster than
    # np.array(l[:max_rows])
    datastream = iter(datastream)
    dtype = np.dtype(fields)
    count = -1 if numlines is None else numlines
    array = fromiter(datastream, dtype=dtype, count=count)
    for field in invert:
        array[field] = ~array[field]
    return array
Exemplo n.º 6
0
def stream_to_array(fields, datastream, numlines=None, invert=()):
    # make sure datastream is an iterator, not a list, otherwise it could
    # loop indefinitely as it will never be consumed.
    # Note that, contrary to what I thought, we shouldn't make a special case
    # for that as np.fromiter(islice(iter(l), max_rows)) is faster than
    # np.array(l[:max_rows])
    datastream = iter(datastream)
    dtype = np.dtype(fields)
    count = -1 if numlines is None else numlines
    array = fromiter(datastream, dtype=dtype, count=count)
    for field in invert:
        array[field] = ~array[field]
    return array