Пример #1
0
    def save(self):
        """
        Create the writer & save
        """
        # GH21227 internal compression is not used when file-like passed.
        if self.compression and hasattr(self.path_or_buf, 'write'):
            msg = ("compression has no effect when passing file-like "
                   "object as input.")
            warnings.warn(msg, RuntimeWarning, stacklevel=2)

        # when zip compression is called.
        is_zip = isinstance(self.path_or_buf, ZipFile) or (
            not hasattr(self.path_or_buf, 'write')
            and self.compression == 'zip')

        if is_zip:
            # zipfile doesn't support writing string to archive. uses string
            # buffer to receive csv writing and dump into zip compression
            # file handle. GH21241, GH21118
            f = StringIO()
            close = False
        elif hasattr(self.path_or_buf, 'write'):
            f = self.path_or_buf
            close = False
        else:
            f, handles = _get_handle(self.path_or_buf, self.mode,
                                     encoding=self.encoding,
                                     compression=self.compression)
            close = True

        try:
            writer_kwargs = dict(lineterminator=self.line_terminator,
                                 delimiter=self.sep, quoting=self.quoting,
                                 doublequote=self.doublequote,
                                 escapechar=self.escapechar,
                                 quotechar=self.quotechar)
            if self.encoding == 'ascii':
                self.writer = csvlib.writer(f, **writer_kwargs)
            else:
                writer_kwargs['encoding'] = self.encoding
                self.writer = UnicodeWriter(f, **writer_kwargs)

            self._save()

        finally:
            if is_zip:
                # GH17778 handles zip compression separately.
                buf = f.getvalue()
                if hasattr(self.path_or_buf, 'write'):
                    self.path_or_buf.write(buf)
                else:
                    f, handles = _get_handle(self.path_or_buf, self.mode,
                                             encoding=self.encoding,
                                             compression=self.compression)
                    f.write(buf)
                    close = True
            if close:
                f.close()
                for _fh in handles:
                    _fh.close()
Пример #2
0
    def save(self):
        """
        Create the writer & save
        """
        # GH21227 internal compression is not used when file-like passed.
        if self.compression and hasattr(self.path_or_buf, 'write'):
            msg = ("compression has no effect when passing file-like "
                   "object as input.")
            warnings.warn(msg, RuntimeWarning, stacklevel=2)

        # when zip compression is called.
        is_zip = isinstance(self.path_or_buf, ZipFile) or (
            not hasattr(self.path_or_buf, 'write')
            and self.compression == 'zip')

        if is_zip:
            # zipfile doesn't support writing string to archive. uses string
            # buffer to receive csv writing and dump into zip compression
            # file handle. GH21241, GH21118
            f = StringIO()
            close = False
        elif hasattr(self.path_or_buf, 'write'):
            f = self.path_or_buf
            close = False
        else:
            f, handles = _get_handle(self.path_or_buf, self.mode,
                                     encoding=self.encoding,
                                     compression=self.compression)
            close = True

        try:
            writer_kwargs = dict(lineterminator=self.line_terminator,
                                 delimiter=self.sep, quoting=self.quoting,
                                 doublequote=self.doublequote,
                                 escapechar=self.escapechar,
                                 quotechar=self.quotechar)
            if self.encoding == 'ascii':
                self.writer = csvlib.writer(f, **writer_kwargs)
            else:
                writer_kwargs['encoding'] = self.encoding
                self.writer = UnicodeWriter(f, **writer_kwargs)

            self._save()

        finally:
            if is_zip:
                # GH17778 handles zip compression separately.
                buf = f.getvalue()
                if hasattr(self.path_or_buf, 'write'):
                    self.path_or_buf.write(buf)
                else:
                    f, handles = _get_handle(self.path_or_buf, self.mode,
                                             encoding=self.encoding,
                                             compression=self.compression)
                    f.write(buf)
                    close = True
            if close:
                f.close()
                for _fh in handles:
                    _fh.close()
Пример #3
0
    def save(self):
        # create the writer & save
        if self.encoding is None:
            if compat.PY2:
                encoding = 'ascii'
            else:
                encoding = 'utf-8'
        else:
            encoding = self.encoding

        # PR 21300 uses string buffer to receive csv writing and dump into
        # file-like output with compression as option. GH 21241, 21118
        f = StringIO()
        if not is_file_like(self.path_or_buf):
            # path_or_buf is path
            path_or_buf = self.path_or_buf
        elif hasattr(self.path_or_buf, 'name'):
            # path_or_buf is file handle
            path_or_buf = self.path_or_buf.name
        else:
            # path_or_buf is file-like IO objects.
            f = self.path_or_buf
            path_or_buf = None

        try:
            writer_kwargs = dict(lineterminator=self.line_terminator,
                                 delimiter=self.sep,
                                 quoting=self.quoting,
                                 doublequote=self.doublequote,
                                 escapechar=self.escapechar,
                                 quotechar=self.quotechar)
            if encoding == 'ascii':
                self.writer = csvlib.writer(f, **writer_kwargs)
            else:
                writer_kwargs['encoding'] = encoding
                self.writer = UnicodeWriter(f, **writer_kwargs)

            self._save()

        finally:
            # GH 17778 handles zip compression for byte strings separately.
            buf = f.getvalue()
            if path_or_buf:
                f, handles = _get_handle(path_or_buf,
                                         self.mode,
                                         encoding=encoding,
                                         compression=self.compression)
                f.write(buf)
                f.close()
                for _fh in handles:
                    _fh.close()
Пример #4
0
    def save(self):
        # create the writer & save
        if self.encoding is None:
            if compat.PY2:
                encoding = 'ascii'
            else:
                encoding = 'utf-8'
        else:
            encoding = self.encoding

        # PR 21300 uses string buffer to receive csv writing and dump into
        # file-like output with compression as option. GH 21241, 21118
        f = StringIO()
        if not is_file_like(self.path_or_buf):
            # path_or_buf is path
            path_or_buf = self.path_or_buf
        elif hasattr(self.path_or_buf, 'name'):
            # path_or_buf is file handle
            path_or_buf = self.path_or_buf.name
        else:
            # path_or_buf is file-like IO objects.
            f = self.path_or_buf
            path_or_buf = None

        try:
            writer_kwargs = dict(lineterminator=self.line_terminator,
                                 delimiter=self.sep, quoting=self.quoting,
                                 doublequote=self.doublequote,
                                 escapechar=self.escapechar,
                                 quotechar=self.quotechar)
            if encoding == 'ascii':
                self.writer = csvlib.writer(f, **writer_kwargs)
            else:
                writer_kwargs['encoding'] = encoding
                self.writer = UnicodeWriter(f, **writer_kwargs)

            self._save()

        finally:
            # GH 17778 handles zip compression for byte strings separately.
            buf = f.getvalue()
            if path_or_buf:
                f, handles = _get_handle(path_or_buf, self.mode,
                                         encoding=encoding,
                                         compression=self.compression)
                f.write(buf)
                f.close()
                for _fh in handles:
                    _fh.close()
Пример #5
0
def createDocForOneMetric(dir, label_mode):
    metrics = extraktLastDirFromPath(dir)
    document = Document()
    header = document.add_heading('Report: ', 1)

    listOfTrueLabels = []
    listOfPredictedLabels = []
    listOfK = []

    for r, d, f in os.walk(dir):
        header.add_run(metrics + ' mertics').bold = True
        for file in f:
            if isCorrectFileName(file):
                pathToFileWithClassificationData = os.path.abspath(dir)

                k = extraktNumberOfK(file)
                trueLabels, predictedLabels = extractClassificationCases(pathToFileWithClassificationData + "\\" + file)
                plot_confusion_matrix(trueLabels, predictedLabels, classes=unique_labels(trueLabels, predictedLabels),
                                      normalize=True,
                                      title='Normalized confusion matrix for ' + str(k) + ' k')
                memfile = StringIO()
                plt.savefig(memfile)
                document.add_paragraph('Confusion matrix for ' + str(k) + ' k.', style='List Number')
                document.add_picture(memfile, width=Inches(5))
                memfile.close()
                plt.close()

                listOfTrueLabels.append(trueLabels)
                listOfPredictedLabels.append(predictedLabels)
                listOfK.append(k)

    if label_mode == "single":
        plotClassificationQualityForSingleLabel(listOfTrueLabels, listOfPredictedLabels, listOfK)
    elif label_mode == "multi":
        plotClassificationQualityForMultiLabels(listOfTrueLabels, listOfPredictedLabels, listOfK)
    memfile = StringIO()
    plt.savefig(memfile)
    document.add_paragraph('Classification quality char for all k.', style='List Number')
    document.add_picture(memfile, width=Inches(6))
    memfile.close()
    document.save('report_' + metrics + '.docx')
Пример #6
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
        If 'infer' and `filepath_or_buffer` is path-like, then detect
        compression from the following extensions: '.gz', '.bz2', '.zip',
        or '.xz' (otherwise no compression).
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if is_path:
        compression = _infer_compression(path_or_buf, compression)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            # Ensure the container is closed as well.
            handles.append(zf)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            mode = "wb" if mode == "w" else mode
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding, newline="")
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace', newline="")
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if (compat.PY3 and is_text and
            (compression or isinstance(f, need_text_wrapping))):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding, newline='')
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Пример #7
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and is_text and\
            (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Пример #8
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were openned in this function.
    """

    handles = list()
    f = path_or_buf
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path_or_buf)
            zip_names = zip_file.namelist()
            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path_or_buf))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP: {}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        else:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles