def save(self): """ Create the writer & save """ # GH21227 internal compression is not used when file-like passed. if self.compression and hasattr(self.path_or_buf, 'write'): msg = ("compression has no effect when passing file-like " "object as input.") warnings.warn(msg, RuntimeWarning, stacklevel=2) # when zip compression is called. is_zip = isinstance(self.path_or_buf, ZipFile) or ( not hasattr(self.path_or_buf, 'write') and self.compression == 'zip') if is_zip: # zipfile doesn't support writing string to archive. uses string # buffer to receive csv writing and dump into zip compression # file handle. GH21241, GH21118 f = StringIO() close = False elif hasattr(self.path_or_buf, 'write'): f = self.path_or_buf close = False else: f, handles = _get_handle(self.path_or_buf, self.mode, encoding=self.encoding, compression=self.compression) close = True try: writer_kwargs = dict(lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) if self.encoding == 'ascii': self.writer = csvlib.writer(f, **writer_kwargs) else: writer_kwargs['encoding'] = self.encoding self.writer = UnicodeWriter(f, **writer_kwargs) self._save() finally: if is_zip: # GH17778 handles zip compression separately. buf = f.getvalue() if hasattr(self.path_or_buf, 'write'): self.path_or_buf.write(buf) else: f, handles = _get_handle(self.path_or_buf, self.mode, encoding=self.encoding, compression=self.compression) f.write(buf) close = True if close: f.close() for _fh in handles: _fh.close()
def save(self): # create the writer & save if self.encoding is None: if compat.PY2: encoding = 'ascii' else: encoding = 'utf-8' else: encoding = self.encoding # PR 21300 uses string buffer to receive csv writing and dump into # file-like output with compression as option. GH 21241, 21118 f = StringIO() if not is_file_like(self.path_or_buf): # path_or_buf is path path_or_buf = self.path_or_buf elif hasattr(self.path_or_buf, 'name'): # path_or_buf is file handle path_or_buf = self.path_or_buf.name else: # path_or_buf is file-like IO objects. f = self.path_or_buf path_or_buf = None try: writer_kwargs = dict(lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) if encoding == 'ascii': self.writer = csvlib.writer(f, **writer_kwargs) else: writer_kwargs['encoding'] = encoding self.writer = UnicodeWriter(f, **writer_kwargs) self._save() finally: # GH 17778 handles zip compression for byte strings separately. buf = f.getvalue() if path_or_buf: f, handles = _get_handle(path_or_buf, self.mode, encoding=encoding, compression=self.compression) f.write(buf) f.close() for _fh in handles: _fh.close()
def createDocForOneMetric(dir, label_mode): metrics = extraktLastDirFromPath(dir) document = Document() header = document.add_heading('Report: ', 1) listOfTrueLabels = [] listOfPredictedLabels = [] listOfK = [] for r, d, f in os.walk(dir): header.add_run(metrics + ' mertics').bold = True for file in f: if isCorrectFileName(file): pathToFileWithClassificationData = os.path.abspath(dir) k = extraktNumberOfK(file) trueLabels, predictedLabels = extractClassificationCases(pathToFileWithClassificationData + "\\" + file) plot_confusion_matrix(trueLabels, predictedLabels, classes=unique_labels(trueLabels, predictedLabels), normalize=True, title='Normalized confusion matrix for ' + str(k) + ' k') memfile = StringIO() plt.savefig(memfile) document.add_paragraph('Confusion matrix for ' + str(k) + ' k.', style='List Number') document.add_picture(memfile, width=Inches(5)) memfile.close() plt.close() listOfTrueLabels.append(trueLabels) listOfPredictedLabels.append(predictedLabels) listOfK.append(k) if label_mode == "single": plotClassificationQualityForSingleLabel(listOfTrueLabels, listOfPredictedLabels, listOfK) elif label_mode == "multi": plotClassificationQualityForMultiLabels(listOfTrueLabels, listOfPredictedLabels, listOfK) memfile = StringIO() plt.savefig(memfile) document.add_paragraph('Classification quality char for all k.', style='List Number') document.add_picture(memfile, width=Inches(6)) memfile.close() document.save('report_' + metrics + '.docx')
def _get_handle(path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True): """ Get file handle for given path/buffer and mode. Parameters ---------- path_or_buf : a path (str) or buffer mode : str mode to open path_or_buf with encoding : str or None compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None If 'infer' and `filepath_or_buffer` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no compression). memory_map : boolean, default False See parsers._parser_params for more information. is_text : boolean, default True whether file/buffer is in text format (csv, json, etc.), or in binary mode (pickle, etc.) Returns ------- f : file-like A file-like object handles : list of file-like objects A list of file-like object that were opened in this function. """ try: from s3fs import S3File need_text_wrapping = (BytesIO, S3File) except ImportError: need_text_wrapping = (BytesIO,) handles = list() f = path_or_buf # Convert pathlib.Path/py.path.local or string path_or_buf = _stringify_path(path_or_buf) is_path = isinstance(path_or_buf, compat.string_types) if is_path: compression = _infer_compression(path_or_buf, compression) if compression: if compat.PY2 and not is_path and encoding: msg = 'compression with encoding is not yet supported in Python 2' raise ValueError(msg) # GZ Compression if compression == 'gzip': import gzip if is_path: f = gzip.open(path_or_buf, mode) else: f = gzip.GzipFile(fileobj=path_or_buf) # BZ Compression elif compression == 'bz2': import bz2 if is_path: f = bz2.BZ2File(path_or_buf, mode) elif compat.PY2: # Python 2's bz2 module can't take file objects, so have to # run through decompress manually f = StringIO(bz2.decompress(path_or_buf.read())) path_or_buf.close() else: f = bz2.BZ2File(path_or_buf) # ZIP Compression elif compression == 'zip': zf = BytesZipFile(path_or_buf, mode) # Ensure the container is closed as well. handles.append(zf) if zf.mode == 'w': f = zf elif zf.mode == 'r': zip_names = zf.namelist() if len(zip_names) == 1: f = zf.open(zip_names.pop()) elif len(zip_names) == 0: raise ValueError('Zero files found in ZIP file {}' .format(path_or_buf)) else: raise ValueError('Multiple files found in ZIP file.' ' Only one file per ZIP: {}' .format(zip_names)) # XZ Compression elif compression == 'xz': lzma = compat.import_lzma() f = lzma.LZMAFile(path_or_buf, mode) # Unrecognized Compression else: msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg) handles.append(f) elif is_path: if compat.PY2: # Python 2 mode = "wb" if mode == "w" else mode f = open(path_or_buf, mode) elif encoding: # Python 3 and encoding f = open(path_or_buf, mode, encoding=encoding, newline="") elif is_text: # Python 3 and no explicit encoding f = open(path_or_buf, mode, errors='replace', newline="") else: # Python 3 and binary mode f = open(path_or_buf, mode) handles.append(f) # in Python 3, convert BytesIO or fileobjects passed with an encoding if (compat.PY3 and is_text and (compression or isinstance(f, need_text_wrapping))): from io import TextIOWrapper f = TextIOWrapper(f, encoding=encoding, newline='') handles.append(f) if memory_map and hasattr(f, 'fileno'): try: g = MMapWrapper(f) f.close() f = g except Exception: # we catch any errors that may have occurred # because that is consistent with the lower-level # functionality of the C engine (pd.read_csv), so # leave the file handler as is then pass return f, handles
def _get_handle(path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True): """ Get file handle for given path/buffer and mode. Parameters ---------- path_or_buf : a path (str) or buffer mode : str mode to open path_or_buf with encoding : str or None compression : str or None Supported compression protocols are gzip, bz2, zip, and xz memory_map : boolean, default False See parsers._parser_params for more information. is_text : boolean, default True whether file/buffer is in text format (csv, json, etc.), or in binary mode (pickle, etc.) Returns ------- f : file-like A file-like object handles : list of file-like objects A list of file-like object that were opened in this function. """ try: from s3fs import S3File need_text_wrapping = (BytesIO, S3File) except ImportError: need_text_wrapping = (BytesIO,) handles = list() f = path_or_buf # Convert pathlib.Path/py.path.local or string path_or_buf = _stringify_path(path_or_buf) is_path = isinstance(path_or_buf, compat.string_types) if compression: if compat.PY2 and not is_path and encoding: msg = 'compression with encoding is not yet supported in Python 2' raise ValueError(msg) # GZ Compression if compression == 'gzip': import gzip if is_path: f = gzip.open(path_or_buf, mode) else: f = gzip.GzipFile(fileobj=path_or_buf) # BZ Compression elif compression == 'bz2': import bz2 if is_path: f = bz2.BZ2File(path_or_buf, mode) elif compat.PY2: # Python 2's bz2 module can't take file objects, so have to # run through decompress manually f = StringIO(bz2.decompress(path_or_buf.read())) path_or_buf.close() else: f = bz2.BZ2File(path_or_buf) # ZIP Compression elif compression == 'zip': zf = BytesZipFile(path_or_buf, mode) if zf.mode == 'w': f = zf elif zf.mode == 'r': zip_names = zf.namelist() if len(zip_names) == 1: f = zf.open(zip_names.pop()) elif len(zip_names) == 0: raise ValueError('Zero files found in ZIP file {}' .format(path_or_buf)) else: raise ValueError('Multiple files found in ZIP file.' ' Only one file per ZIP: {}' .format(zip_names)) # XZ Compression elif compression == 'xz': lzma = compat.import_lzma() f = lzma.LZMAFile(path_or_buf, mode) # Unrecognized Compression else: msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg) handles.append(f) elif is_path: if compat.PY2: # Python 2 f = open(path_or_buf, mode) elif encoding: # Python 3 and encoding f = open(path_or_buf, mode, encoding=encoding) elif is_text: # Python 3 and no explicit encoding f = open(path_or_buf, mode, errors='replace') else: # Python 3 and binary mode f = open(path_or_buf, mode) handles.append(f) # in Python 3, convert BytesIO or fileobjects passed with an encoding if compat.PY3 and is_text and\ (compression or isinstance(f, need_text_wrapping)): from io import TextIOWrapper f = TextIOWrapper(f, encoding=encoding) handles.append(f) if memory_map and hasattr(f, 'fileno'): try: g = MMapWrapper(f) f.close() f = g except Exception: # we catch any errors that may have occurred # because that is consistent with the lower-level # functionality of the C engine (pd.read_csv), so # leave the file handler as is then pass return f, handles
def _get_handle(path_or_buf, mode, encoding=None, compression=None, memory_map=False): """ Get file handle for given path/buffer and mode. Parameters ---------- path_or_buf : a path (str) or buffer mode : str mode to open path_or_buf with encoding : str or None compression : str or None Supported compression protocols are gzip, bz2, zip, and xz memory_map : boolean, default False See parsers._parser_params for more information. Returns ------- f : file-like A file-like object handles : list of file-like objects A list of file-like object that were openned in this function. """ handles = list() f = path_or_buf is_path = isinstance(path_or_buf, compat.string_types) if compression: if compat.PY2 and not is_path and encoding: msg = 'compression with encoding is not yet supported in Python 2' raise ValueError(msg) # GZ Compression if compression == 'gzip': import gzip if is_path: f = gzip.open(path_or_buf, mode) else: f = gzip.GzipFile(fileobj=path_or_buf) # BZ Compression elif compression == 'bz2': import bz2 if is_path: f = bz2.BZ2File(path_or_buf, mode) elif compat.PY2: # Python 2's bz2 module can't take file objects, so have to # run through decompress manually f = StringIO(bz2.decompress(path_or_buf.read())) path_or_buf.close() else: f = bz2.BZ2File(path_or_buf) # ZIP Compression elif compression == 'zip': import zipfile zip_file = zipfile.ZipFile(path_or_buf) zip_names = zip_file.namelist() if len(zip_names) == 1: f = zip_file.open(zip_names.pop()) elif len(zip_names) == 0: raise ValueError('Zero files found in ZIP file {}' .format(path_or_buf)) else: raise ValueError('Multiple files found in ZIP file.' ' Only one file per ZIP: {}' .format(zip_names)) # XZ Compression elif compression == 'xz': lzma = compat.import_lzma() f = lzma.LZMAFile(path_or_buf, mode) # Unrecognized Compression else: msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg) handles.append(f) elif is_path: if compat.PY2: # Python 2 f = open(path_or_buf, mode) elif encoding: # Python 3 and encoding f = open(path_or_buf, mode, encoding=encoding) else: # Python 3 and no explicit encoding f = open(path_or_buf, mode, errors='replace') handles.append(f) # in Python 3, convert BytesIO or fileobjects passed with an encoding if compat.PY3 and (compression or isinstance(f, need_text_wrapping)): from io import TextIOWrapper f = TextIOWrapper(f, encoding=encoding) handles.append(f) if memory_map and hasattr(f, 'fileno'): try: g = MMapWrapper(f) f.close() f = g except Exception: # we catch any errors that may have occurred # because that is consistent with the lower-level # functionality of the C engine (pd.read_csv), so # leave the file handler as is then pass return f, handles