Пример #1
0
def test_idzip():
    runner = CliRunner(mix_stderr=False)
    path = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
    stdin_data = io.BytesIO(open(path, 'rb').read())
    result = runner.invoke(
        indexing.idzip_compression,
        ['-'],
        input=stdin_data)
    assert b"Detected gzip input file" in result.stderr_bytes
    outbuff = io.BytesIO(result.stdout_bytes)
    outstream = _compression.GzipFile(fileobj=outbuff, mode='rb')
    instream = _compression.GzipFile(path, mode='rb')
    in_data = instream.read()
    out_data = outstream.read()
    assert in_data == out_data

    path = datafile("small.mzML")
    stdin_data = io.BytesIO(open(path, 'rb').read())
    result = runner.invoke(
        indexing.idzip_compression,
        ['-'],
        input=stdin_data)
    assert b"Detected gzip input file" not in result.stderr_bytes
    outbuff = io.BytesIO(result.stdout_bytes)
    outstream = _compression.GzipFile(fileobj=outbuff, mode='rb')
    instream = io.open(path, mode='rb')
    in_data = instream.read()
    out_data = outstream.read()
    assert in_data == out_data
Пример #2
0
def guess_type_from_file_sniffing(file_path: Union[os.PathLike, io.IOBase]):
    is_random_access_file = is_random_access(file_path)
    if is_random_access_file:
        handle = file_path
        header = handle.read(1000)
        handle.seek(0)
    else:
        with open(file_path, 'rb') as handle:
            header = handle.read(1000)

    if _compression.starts_with_gz_magic(header):
        if is_random_access_file:
            handle = _compression.GzipFile(fileobj=file_path, mode='rb')
            header = handle.read(1000)
            handle.seek(0)
        else:
            with _compression.GzipFile(file_path, mode='rb') as handle:
                header = handle.read(1000)

    if b"mzML" in header:
        return ProcessedMzMLLoader
    elif b"BEGIN IONS" in header:
        return ProcessedMGFLoader
    else:
        raise ValueError("Cannot determine ScanLoader type from header")
Пример #3
0
def get_writer(filename, **kwargs):
    '''Open a writer for a provided filename, inferring the format from
    the file extension.

    .. warning::
        If using a file-like object, do not use a compressed writer or else
        the stream will be doubly-compressed.

    Parameters
    ----------
    filename : :class:`str`, :class:`os.PathLike`, or file-like object
        The path to the file to open, or a file-like object with a "name"
        attribute.
    **kwargs
        Keyword arguments forwarded to the writer

    Returns
    -------
    MzMLSerializer or MGFSerializer or MzMLbSerializer
    '''
    if hasattr(filename, 'name'):
        handle = filename
        name = handle.name
    else:
        handle = None
        name = os.path.basename(filename)

    is_gzipped = False
    if name.lower().endswith(".gz"):
        is_gzipped = True
        name = name[:-3]

    name, ext = os.path.splitext(name.lower())
    serializer_cls = serializers[ext[1:]]

    if serializer_cls == MzMLbSerializer:
        return serializer_cls(filename, **kwargs)

    if is_gzipped:
        if handle is None:
            handle = _compression.GzipFile(filename, 'wb')
        else:
            handle = _compression.GzipFile(fileobj=filename, mode='wb')

    else:
        if handle is None:
            handle = open(filename, 'wb')

    return serializer_cls(handle, **kwargs)
Пример #4
0
def test_mgf():
    runner = CliRunner(mix_stderr=False)
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists before running test")
    path = datafile("small.mzML")
    result = runner.invoke(conversion.mgf, [path, '-'], catch_exceptions=False)
    lines = result.output.splitlines()
    count = 0
    for line in lines:
        if "BEGIN" in line:
            count += 1
    assert count == 34
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists after running uncompressed test")
    result = runner.invoke(conversion.mgf, [path, '-z', '-'])
    assert _compression.starts_with_gz_magic(result.stdout_bytes)
    buff = io.BytesIO(result.stdout_bytes)
    reader = _compression.GzipFile(fileobj=buff, mode='rb')
    count = 0
    for line in reader:
        if b"BEGIN" in line:
            count += 1
    assert count == 34
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists after running compressed test")
Пример #5
0
 def idzip_compression(path, output):
     if output is None:
         output = '-'
     with click.open_file(output, mode='wb') as outfh:
         writer = _compression.GzipFile(fileobj=outfh, mode='wb')
         with click.open_file(path, 'rb') as infh:
             buffer_size = 2 ** 28
             chunk = infh.read(buffer_size)
             while chunk:
                 writer.write(chunk)
                 chunk = infh.read(buffer_size)
         writer.close()
Пример #6
0
 def idzip_compression(path, output):
     '''Compress a file using  idzip, a gzip-compatible format with random access support.
     '''
     if output is None:
         output = '-'
     with click.open_file(output, mode='wb') as outfh:
         writer = _compression.GzipFile(fileobj=outfh, mode='wb')
         with click.open_file(path, 'rb') as infh:
             try:
                 infh_wrap = io.BufferedReader(infh)
                 header = infh_wrap.peek(2)
                 if _compression.starts_with_gz_magic(header):
                     click.echo("Detected gzip input file", err=True)
                     infh_wrap = _compression.GzipFile(fileobj=infh_wrap)
             except AttributeError:
                 infh_wrap = infh
             buffer_size = _compression.WRITE_BUFFER_SIZE
             chunk = infh_wrap.read(buffer_size)
             while chunk:
                 writer.write(chunk)
                 chunk = infh_wrap.read(buffer_size)
         writer.close()