示例#1
0
    def __call__(self, data):
        """
        Print data info.

        :param any data: Any type of iterable
        :return: data unchanged
        :rtype: same as data
        """
        items = [(i, e) for i, e in enumerate(as_tuple(data))]
        cols = None if self.cols is None else as_tuple(self.cols)
        has_fields = hasattr(data, '_fields')
        colnames = data._fields if has_fields else [str(i) for i, _ in items]

        self.cnt += 1
        print('item {}: <{}>'.format(self.cnt, type(data).__name__))
        for i, e in items:
            if cols is None or i in cols:
                typename = type(e).__name__
                print('  {}: <{}>'.format(colnames[i], typename), end=' ')
                if istensor(e):
                    msg = 'shape:{} dtype:{} range:{}..{}'
                    print(msg.format(shapestr(e), e.dtype, e.min(), e.max()))
                else:
                    print('{}'.format(str(e)))
        return data
示例#2
0
def RandomImagePatches(iterable, imagecols, pshape, npatches):
    """
    samples >> RandomImagePatches(imagecols, shape, npatches)

    Extract patches at random locations from images.

    >>> import numpy as np
    >>> np.random.seed(0)    # just to ensure stable doctest
    >>> img = np.reshape(np.arange(30), (5, 6))
    >>> samples = [(img, 0)]
    >>> getpatches = RandomImagePatches(0, (2, 3), 3)
    >>> for (p, l) in samples >> getpatches:
    ...     print(p.tolist(), l)
    [[7, 8, 9], [13, 14, 15]] 0
    [[8, 9, 10], [14, 15, 16]] 0
    [[8, 9, 10], [14, 15, 16]] 0

    :param iterable iterable: Samples with images
    :param int|tuple imagecols: Indices of sample columns that contain
      images, where patches are extracted from.
      Images must be numpy arrays of shape h,w,c or h,w
    :param tuple shape: Shape of patch (h,w)
    :param int npatches: Number of patches to extract (per image)
    :return: Iterator over samples where images are replaced by patches.
    :rtype: generator
    """
    imagecols = as_tuple(imagecols)
    for sample in iterable:
        image = sample[imagecols[0]]
        hi, wi = image.shape[:2]
        hs2, ws2 = pshape[0] // 2 + 1, pshape[1] // 2 + 1
        rr = np.random.randint(hs2, hi - hs2, npatches)
        cc = np.random.randint(ws2, wi - ws2, npatches)
        for r, c in zip(rr, cc):
            yield ut.col_map(sample, imagecols, ni.extract_patch, pshape, r, c)
示例#3
0
def FilterCol(iterable, columns, func):
    """
    iterable >> FilterCol(columns, func)

    Filter elements from iterable based on predicate function and
    specified column(s).

    >>> is_even = lambda n: n % 2 == 0
    >>> [(0, 'e'), (1, 'o'), (2, 'e')] >> FilterCol(0, is_even) >> Collect()
    [(0, 'e'), (2, 'e')]


    :param iterable iterable: Any iterable
    :param int|tuple columns: Column or columns to extract from each
        element before passing it on to the predicate function.
    :param function func: Predicate function. Element is removed if False.
    :return: Filtered iterable
    :rtype: Iterator
    """
    cols = as_tuple(columns)
    if len(cols) == 1:
        extract = lambda es: es[columns]
    else:
        extract = lambda es: [es[i] for i, e in enumerate(es) if i in cols]
    for es in iterable:
        if func(extract(es)):
            yield es
示例#4
0
def Append(iterable, items):
    """
    iterable >> Append(items)

    Append item(s) to lists/tuples in iterable.

    >>> [(1, 2), (3, 4)] >> Append('X') >> Collect()
    [(1, 2, 'X'), (3, 4, 'X')]

    >>> items = ['a', 'b']
    >>> [(1, 2), (3, 4)] >> Append(items) >> Collect()
    [(1, 2, 'a'), (3, 4, 'b')]

    >>> items = [('a', 'b'), ('c', 'd')]
    >>> [(1, 2), (3, 4)] >> Append(items) >> Collect()
    [(1, 2, 'a', 'b'), (3, 4, 'c', 'd')]

    >>> from nutsflow import Enumerate
    >>> [(1, 2), (3, 4)] >> Append(Enumerate()) >> Collect()
    [(1, 2, 0), (3, 4, 1)]

    :param iterable iterable iterable: Any iterable over tuples or lists
    :param iterable|object items: A single object or an iterable over objects.
    :return: iterator where items are appended to the iterable elements.
    :rtype: iterator over tuples
    """
    items = items if is_iterable(items) else itt.repeat(items)
    for elem, item in zip(iterable, items):
        yield tuple(elem) + as_tuple(item)
示例#5
0
    def __init__(self,
                 filepath,
                 cols=None,
                 skipheader=0,
                 flush=False,
                 encoding=None,
                 fmtfunc=lambda x: x,
                 **kwargs):
        """
        WriteCSV(filepath, cols, skipheader, flush, fmtfunc, **kwargs)

        Write data in Comma Separated Values format (CSV) and other formats
        to file. Tab Separated Values (TSV) files can be written by
        specifying a different delimiter. Note that in the docstring below
        delimiter is '\\t' but in code it should be '\t'. See unit tests.

        Also see https://docs.python.org/2/library/csv.html
        and ReadCSV.


        >>> import os
        >>> filepath = 'tests/data/temp_out.csv'
        >>> with WriteCSV(filepath) as writer:
        ...     range(10) >> writer
        >>> os.remove(filepath)

        >>> with WriteCSV(filepath, cols=(1,0)) as writer:
        ...     [(1,2), (3,4)] >> writer
        >>> os.remove(filepath)

        >>> filepath = 'tests/data/temp_out.tsv'
        >>> with WriteCSV(filepath, delimiter='\\t') as writer:
        ...     [[1,2], [3,4]] >> writer
        >>> os.remove(filepath)


        :param string filepath: Path to file in CSV format.
        :param tuple cols: Indices of the columns to write.
                           If None all columns are written.
        :param int skipheader: Number of header rows to skip.
        :param bool flush: If True flush after every line written.
        :param str encoding: Character encoding, e.g. "utf-8"
                             Ignored for Python 2.x!
        :param function fmtfunc: Function to apply to the elements of each row.
        :param kwargs kwargs: Keyword arguments for Python's CSV writer.
                              See https://docs.python.org/2/library/csv.html
        """
        open2 = lambda fp: open(fp, 'w')
        open3 = lambda fp: open(fp, 'w', encoding=encoding)
        is_py3 = sys.version_info >= (3, 0)
        self.csvfile = open3(filepath) if is_py3 else open2(filepath)
        self.columns = cols if cols is None else as_tuple(cols)
        self.flush = flush
        self.fmtfunc = fmtfunc
        self.skipheader = skipheader
        self.writer = csv.writer(self.csvfile, lineterminator='\n', **kwargs)
示例#6
0
    def __call__(self, data):
        """
        Print data info.

        :param any data: Any type of iterable 
        :return: data unchanged
        :rtype: same as data
        """
        cols = None if self.cols is None else as_tuple(self.cols)
        self.cnt += 1
        print('item {}: <{}>'.format(self.cnt, type(data).__name__))
        for i, e in enumerate(as_tuple(data)):
            if cols is None or i in cols:
                print('  {}: <{}>'.format(i, type(e).__name__), end=' ')
                if isinstance(e, np.ndarray):
                    text = 'shape:{} dtype:{} range:{}..{}'
                    print(
                        text.format(shapestr(e), e.dtype, np.min(e), np.max(e)))
                else:
                    print('{}'.format(str(e)))
        return data
示例#7
0
    def __init__(self,
                 imgcols,
                 layout=(1, None),
                 figsize=None,
                 pause=0.0001,
                 **imargs):
        """
        iterable >> ViewImage(imgcols, layout=(1, None), figsize=None, **plotargs)

        |  Images should be numpy arrays in one of the following formats:
        |  MxN - luminance (grayscale, float array only)
        |  MxNx3 - RGB (float or uint8 array)
        |  MxNx4 - RGBA (float or uint8 array)

        Shapes with single-dimension axis are supported but not encouraged,
        e.g. MxNx1 will be converted to MxN.

        See
        http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.imshow

        >>> from nutsflow import Consume
        >>> from nutsml import ReadImage
        >>> imagepath = 'tests/data/img_formats/*.jpg'
        >>> samples = [(1, 'nut_color'), (2, 'nut_grayscale')]
        >>> read_image = ReadImage(1, imagepath)
        >>> samples >> read_image >> ViewImage(1) >> Consume() # doctest: +SKIP

        :param int|tuple imgcols: Index or tuple of indices of data columns
               containing images (ndarray)
        :param tuple layout: Rows and columns of the viewer layout., e.g.
               a layout of (2,3) means that 6 images in the data are
               arranged in 2 rows and 3 columns.
               Number of cols can be None is then derived from imgcols
        :param tuple figsize: Figure size in inch.
        :param float pause: Waiting time in seconds after each plot.
               Pressing a key skips the waiting time.
        :param kwargs imargs: Keyword arguments passed on to matplotlib's
            imshow() function. See
            http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.imshow
        """
        imgcols = as_tuple(imgcols)
        r, c, n = layout[0], layout[1], len(imgcols)
        if c is None:
            c = n
        if n != r * c:
            raise ValueError("Number of images and layout don't match!")

        fig = plt.figure(figsize=figsize)
        fig.canvas.set_window_title('ViewImage')
        self.axes = [fig.add_subplot(r, c, i + 1) for i in range(n)]
        self.imgcols = imgcols
        self.pause = pause
        self.imargs = imargs
示例#8
0
    def __call__(self, x):
        """
        Log x

        :param any x: Any type of data.
                      Special support for numpy arrays.
        :return: Return input unchanged
        :rtype: Same as input
        """
        if isinstance(x, np.ndarray):
            row = x.tolist() if x.ndim else [x.item()]
        else:
            row = x
        if not self.cols is None:
            row = [row[i] for i in as_tuple(self.cols)]
        self._writerow(row)
        return x
示例#9
0
def FlattenCol(iterable, cols):
    """
    iterable >> FlattenCol(cols)

    Flattens the specified columns of the tuples/iterables within the iterable.
    Only one level is flattened.

    (1 3)  (5 7)
    (2 4)  (6 8)   >> FlattenCol((0,1) >>   (1 3)  (2 4)  (5 7)  (6 8)

    If a column contains a single element (instead of an iterable) it is 
    wrapped into a repeater. This allows to flatten columns that are iterable
    together with non-iterable columns, e.g.

    (1 3)  (6 7)
    (2  )  (  8)   >> FlattenCols((0,1) >>   (1 3)  (2 3)  (6 7)  (6 8)

    >>> from nutsflow import Collect
    >>> data = [([1, 2], [3, 4]), ([5, 6], [7, 8])]
    >>> data >> FlattenCol(0) >> Collect()
    [(1,), (2,), (5,), (6,)]

    >>> data >> FlattenCol((0, 1)) >> Collect()
    [(1, 3), (2, 4), (5, 7), (6, 8)]

    >>> data >> FlattenCol((1, 0)) >> Collect()
    [(3, 1), (4, 2), (7, 5), (8, 6)]

    >>> data >> FlattenCol((1, 1, 0)) >> Collect()
    [(3, 3, 1), (4, 4, 2), (7, 7, 5), (8, 8, 6)]

    >>> data = [([1, 2], 3), (6, [7, 8])]
    >>> data >> FlattenCol((0, 1)) >> Collect()
    [(1, 3), (2, 3), (6, 7), (6, 8)]

    :param iterable iterable: Any iterable.
    :params int|tuple columns: Column index or indices
    :return: Flattened columns of iterable
    :rtype: generator
    """
    cols = as_tuple(cols)
    get = lambda e: e if is_iterable(e) else itt.repeat(e)
    for es in iterable:
        for e in zip(*[get(es[c]) for c in cols]):
            yield e
示例#10
0
    def __rrshift__(self, iterable):
        """
        Apply augmentation to samples in iterable.

        :param iterable iterable: Samples
        :return: iterable with augmented samples
        :rtype: generator
        """
        imagecols = as_tuple(self.imagecols)
        rand = self.rand
        for sample in iterable:
            for name, p, ranges, kwargs in self.augmentations:
                n = int(p) if p > 1.0 else 1
                for _ in range(n):
                    if rand.uniform(0, 1) < p:
                        args = [rand.uniform(r[0], r[1]) for r in ranges]
                        transformation = name, args, kwargs
                        yield map_transform(sample, imagecols, transformation)
示例#11
0
def map_transform(sample, imagecols, spec):
    """
    Map transformation function on columns of sample.

    :param tuple sample: Sample with images
    :param int|tuple imagecols: Indices of sample columns the transformation
       should be applied to. Can be a single index or a tuple of indices.
    :param tuple spec: Transformation specification. Either a tuple with
       the name of the transformation function or a tuple with the
       name, arguments and keyword arguments of the transformation function.
    :return: Sample with transformations applied. Columns not specified
      remain unchained.
    :rtype: tuple
    """
    colset = as_tuple(imagecols)
    name, a, kw = spec if isinstance(spec, tuple) else (spec, [], {})
    f = TransformImage.transformations[name]
    enum_sample = enumerate(sample)
    return tuple(f(e, *a, **kw) if i in colset else e for i, e in enum_sample)
示例#12
0
    def _add_data(self, data):
        """Add data point to data buffer"""
        if hasattr(data, 'ndim'):  # is it a numpy array?
            data = data.tolist() if data.ndim else [data.item()]
        else:
            data = as_list(data)

        if hasattr(self.xcols, '__iter__'):
            x = next(self.xcols)
            for i, _ in enumerate(self.ycols):
                self.xdata[i].append(x)
        elif hasattr(self.xcols, '__call__'):
            x = self.xcols()
            for i, _ in enumerate(self.ycols):
                self.xdata[i].append(x)
        else:
            for i, xcol in enumerate(as_tuple(self.xcols)):
                self.xdata[i].append(data[xcol])

        for i, ycol in enumerate(self.ycols):
            self.ydata[i].append(data if ycol < 0 else data[ycol])
示例#13
0
    def __init__(self, imagecols):
        """
        samples >> TransformImage(imagecols)

        Images are expected to be numpy arrays of the shape (h, w, c) or (h, w)
        with a range of [0,255] and a dtype of uint8. Transformation should
        result in images with the same properties.

        >>> transform = TransformImage(0).by('resize', 10, 20)

        :param int|tuple imagecols: Indices of sample columns the transformation
            should be applied to. Can be a single index or a tuple of indices.
        :param tuple transspec: Transformation specification. Either a
            tuple with the name of the transformation function or a tuple
            with the name, arguments and keyword arguments of the
            transformation function.
            The list of argument values and dictionaries provided in the
            transspec are simply passed on to the transformation function.
            See the relevant functions for details.
        """
        self.transspec = []
        self.imagecols = as_tuple(imagecols)
示例#14
0
def test_as_tuple():
    assert as_tuple(1) == (1,)
    assert as_tuple((1, 2)) == (1, 2)
    assert as_tuple([1, 2]) == (1, 2)
示例#15
0
    def __init__(self,
                 filepath,
                 columns=None,
                 skipheader=0,
                 fmtfunc=None,
                 **kwargs):
        """
        ReadCSV(filepath, columns, skipheader, fmtfunc, **kwargs)

        Read data in Comma Separated Format (CSV) from file.
        See also CSVWriter.
        Can also read Tab Separated Format (TSV) be providing the
        corresponding delimiter. Note that in the docstring below
        delimiter is '\\t' but in code it should be '\t'.

        >>> from nutsflow import Collect
        >>> filepath = 'tests/data/data.csv'

        >>> with ReadCSV(filepath, skipheader=1) as reader:
        ...     reader >> Collect()
        [('1', '2', '3'), ('4', '5', '6')]

        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=int) as reader:
        ...     reader >> Collect()
        [(1, 2, 3), (4, 5, 6)]

        >>> fmtfuncs=(int, str, float)
        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=fmtfuncs) as reader:
        ...     reader >> Collect()
        [(1, '2', 3.0), (4, '5', 6.0)]

        >>> with ReadCSV(filepath, (2, 1), 1, int) as reader:
        ...     reader >> Collect()
        [(3, 2), (6, 5)]

        >>> with ReadCSV(filepath, (2, 1), 1, (str,int)) as reader:
        ...     reader >> Collect()
        [('3', 2), ('6', 5)]

        >>> with ReadCSV(filepath, 2, 1, int) as reader:
        ...     reader >> Collect()
        [3, 6]

        >>> filepath = 'tests/data/data.tsv'
        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=int,
        ...                delimiter='\\t') as reader:
        ...     reader >> Collect()
        [(1, 2, 3), (4, 5, 6)]

        :param string filepath: Path to file in CSV format.
        :param tuple columns: Indices of the columns to read.
                              If None all columns are read.
        :param int skipheader: Number of header lines to skip.
        :param tuple|function fmtfunc: Function or functions to apply to the
                              column elements of each row.
        :param kwargs kwargs: Keyword arguments for Python's CSV reader.
                              See https://docs.python.org/2/library/csv.html
        """
        self.csvfile = open(filepath, 'r')
        self.columns = columns if columns is None else as_tuple(columns)
        self.fmtfunc = (lambda x: x) if fmtfunc is None else fmtfunc
        self.is_functions = is_iterable(self.fmtfunc)
        for _ in range(skipheader):
            next(self.csvfile)
        itf.take(self.csvfile, skipheader)
        stripped = (r.strip() for r in self.csvfile)
        self.reader = csv.reader(stripped, **kwargs)