Exemplo n.º 1
0
 def __download_as_pandas(self, chunksize, sniff_ahead=2**20):
     """Download and parse data from URL as a table"""
     with self.__tempfile() as tempfile:
         self.url = self.__copyfileobj(tempfile)
         with open(tempfile, mode="rb") as handle:
             magic = handle.read(3)
         if magic == b"\x1f\x8b\x08":
             compression = "gzip"
             from gzip import open as _open
         elif magic == b"\x42\x5a\x68":
             compression = "bz2"
             from bz2 import open as _open
         else:
             compression, _open = "infer", open
         try:
             with _open(tempfile, mode="rt", newline="") as handle:
                 sep = Sniffer().sniff(handle.read(sniff_ahead)).delimiter
             _reader_kw = dict(
                 sep=sep, compression=compression,
                 chunksize=chunksize, **self.pandas_kws,
             )
             for i, csv_chunk in enumerate(read_csv(tempfile, **_reader_kw)):
                 self.INPLACE_process(csv_chunk)
                 msg = f"interpreted table chunk {i}:\n  {tempfile}"
                 GeneFabLogger.info(f"{self.name}; {msg}")
                 yield csv_chunk
         except (IOError, UnicodeDecodeError, CSVError, PandasParserError):
             msg = "Not recognized as a table file"
             raise GeneFabFileException(msg, name=self.name, url=self.url)
Exemplo n.º 2
0
def createSourcePackage(path):
    gzipStream = _open(filename=path, mode="wb")
    try:
        gitArgs = [
            "git", "archive", "--format=tar",
            "--prefix=midisnoop-%s%s" % (VERSION, sep), "HEAD"
        ]
        oldDirectory = getcwd()
        chdir(getRootDirectory())
        try:
            process = Popen(gitArgs, stdout=PIPE, bufsize=-1)
            try:
                processOut = process.stdout
                while True:
                    data = processOut.read(8192)
                    if not data:
                        break
                    gzipStream.write(data)
            finally:
                result = process.wait()
            if result:
                raise Exception("git archive process failed")
        finally:
            chdir(oldDirectory)
    finally:
        gzipStream.close()
Exemplo n.º 3
0
def open_compressed(filename, *args, _open=open, **kwargs):
    """Return seamlessly decompressed open file handle for `filename`"""
    if isinstance(filename, str):
        if filename.endswith(Compression.GZIP):
            from gzip import open as _open
        elif filename.endswith(Compression.BZIP2):
            from bz2 import open as _open
        elif filename.endswith(Compression.XZ):
            from lzma import open as _open
        return _open(filename, *args, **kwargs)
    # Else already a file, just pass it through
    return filename
Exemplo n.º 4
0
def open_compressed(filename, *args, _open=open, **kwargs):
    """Return seamlessly decompressed open file handle for `filename`"""
    if isinstance(filename, str):
        if filename.endswith(Compression.GZIP):
            from gzip import open as _open
        elif filename.endswith(Compression.BZIP2):
            from bz2 import open as _open
        elif filename.endswith(Compression.XZ):
            from lzma import open as _open
        return _open(filename, *args, **kwargs)
    # Else already a file, just pass it through
    return filename
Exemplo n.º 5
0
def createSourcePackage(path):
    gzipStream = _open(filename=path, mode="wb")
    try:
        gitArgs = ["git", "archive", "--format=tar",
                   "--prefix=midisnoop-%s%s" % (VERSION, sep), "HEAD"]
        oldDirectory = getcwd()
        chdir(getRootDirectory())
        try:
            process = Popen(gitArgs, stdout=PIPE, bufsize=-1)
            try:
                processOut = process.stdout
                while True:
                    data = processOut.read(8192)
                    if not data:
                        break
                    gzipStream.write(data)
            finally:
                result = process.wait()
            if result:
                raise Exception("git archive process failed")
        finally:
            chdir(oldDirectory)
    finally:
        gzipStream.close()