def __download_as_pandas(self, chunksize, sniff_ahead=2**20): """Download and parse data from URL as a table""" with self.__tempfile() as tempfile: self.url = self.__copyfileobj(tempfile) with open(tempfile, mode="rb") as handle: magic = handle.read(3) if magic == b"\x1f\x8b\x08": compression = "gzip" from gzip import open as _open elif magic == b"\x42\x5a\x68": compression = "bz2" from bz2 import open as _open else: compression, _open = "infer", open try: with _open(tempfile, mode="rt", newline="") as handle: sep = Sniffer().sniff(handle.read(sniff_ahead)).delimiter _reader_kw = dict( sep=sep, compression=compression, chunksize=chunksize, **self.pandas_kws, ) for i, csv_chunk in enumerate(read_csv(tempfile, **_reader_kw)): self.INPLACE_process(csv_chunk) msg = f"interpreted table chunk {i}:\n {tempfile}" GeneFabLogger.info(f"{self.name}; {msg}") yield csv_chunk except (IOError, UnicodeDecodeError, CSVError, PandasParserError): msg = "Not recognized as a table file" raise GeneFabFileException(msg, name=self.name, url=self.url)
def createSourcePackage(path): gzipStream = _open(filename=path, mode="wb") try: gitArgs = [ "git", "archive", "--format=tar", "--prefix=midisnoop-%s%s" % (VERSION, sep), "HEAD" ] oldDirectory = getcwd() chdir(getRootDirectory()) try: process = Popen(gitArgs, stdout=PIPE, bufsize=-1) try: processOut = process.stdout while True: data = processOut.read(8192) if not data: break gzipStream.write(data) finally: result = process.wait() if result: raise Exception("git archive process failed") finally: chdir(oldDirectory) finally: gzipStream.close()
def open_compressed(filename, *args, _open=open, **kwargs): """Return seamlessly decompressed open file handle for `filename`""" if isinstance(filename, str): if filename.endswith(Compression.GZIP): from gzip import open as _open elif filename.endswith(Compression.BZIP2): from bz2 import open as _open elif filename.endswith(Compression.XZ): from lzma import open as _open return _open(filename, *args, **kwargs) # Else already a file, just pass it through return filename
def open_compressed(filename, *args, _open=open, **kwargs): """Return seamlessly decompressed open file handle for `filename`""" if isinstance(filename, str): if filename.endswith(Compression.GZIP): from gzip import open as _open elif filename.endswith(Compression.BZIP2): from bz2 import open as _open elif filename.endswith(Compression.XZ): from lzma import open as _open return _open(filename, *args, **kwargs) # Else already a file, just pass it through return filename
def createSourcePackage(path): gzipStream = _open(filename=path, mode="wb") try: gitArgs = ["git", "archive", "--format=tar", "--prefix=midisnoop-%s%s" % (VERSION, sep), "HEAD"] oldDirectory = getcwd() chdir(getRootDirectory()) try: process = Popen(gitArgs, stdout=PIPE, bufsize=-1) try: processOut = process.stdout while True: data = processOut.read(8192) if not data: break gzipStream.write(data) finally: result = process.wait() if result: raise Exception("git archive process failed") finally: chdir(oldDirectory) finally: gzipStream.close()