Exemplo n.º 1
0
def test_quick_index():
    reader = ms_deisotope.MSFileLoader(mzml_path)
    index, _interval_tree = quick_index.index(reader)
    n_1 = len(index.ms1_ids)
    n_n = len(index.msn_ids)
    assert n_1 == 14
    assert n_n == 34
Exemplo n.º 2
0
def metadata_index(paths, processes=4):
    '''Build an external scan metadata index for a mass spectrometry data file

    This extended index is saved in a separate JSON file that can be loaded with
    :class:`~.ExtendedScanIndex`. It includes the scan time of all scans, the precursor
    mass of MSn scans, as well as the relationships between precursor and product ion
    scans, as well as other details. See :class:`~.ExtendedScanIndex` for more information
    '''
    for path in paths:
        reader = MSFileLoader(path)
        try:
            fn = reader.prebuild_byte_offset_file
            if not reader.source._check_has_byte_offset_file():
                fn(path)
        except AttributeError:
            pass
        if processes > 1:
            index, _ = quick_index.index(reader, processes)
        else:
            index = quick_index.ExtendedScanIndex()
            reader.reset()
            for bunch in reader:
                index.add_scan_bunch(bunch)

        name = path
        index_file_name = index.index_file_name(name)
        with open(index_file_name, 'w') as fh:
            index.serialize(fh)
Exemplo n.º 3
0
def metadata_index(paths, processes=4, deconvoluted=False):
    '''Build an external scan metadata index for a mass spectrometry data file

    This extended index is saved in a separate JSON file that can be loaded with
    :class:`~.ExtendedScanIndex`. It includes the scan time of all scans, the precursor
    mass of MSn scans, as well as the relationships between precursor and product ion
    scans, as well as other details. See :class:`~.ExtendedScanIndex` for more information
    '''
    for path in paths:
        click.echo("Indexing %s" % (path, ))
        if deconvoluted:
            reader = ProcessedMzMLDeserializer(path, use_extended_index=False)
        else:
            reader = MSFileLoader(path)
        try:
            fn = reader.prebuild_byte_offset_file
            if not reader.source._check_has_byte_offset_file():
                fn(path)
        except AttributeError:
            pass
        if processes > 1:
            progbar = progress(label='Building Index', length=100)
            acc = [0]

            def update_bar(x):
                '''Progress Bar update callback for :func:`~.quick_index.index`
                '''
                x = int(x * 100)
                x -= acc[0]  # pylint: disable=cell-var-from-loop
                progbar.update(x)  # pylint: disable=cell-var-from-loop
                acc[0] += x  # pylint: disable=cell-var-from-loop

            with progbar:
                update_bar(0.0)
                index, _ = quick_index.index(
                    reader, processes, progress_indicator=update_bar)
        else:
            index = quick_index.ExtendedScanIndex()
            reader.reset()
            try:
                n = len(reader)
                progbar = progress(label='Building Index', length=n)
            except TypeError:
                progbar = spinner(title="Building Index")
            with progbar:
                for bunch in reader.make_iterator(grouped=True):
                    i = 0
                    i += bunch.precursor is not None
                    i += len(bunch.products)
                    index.add_scan_bunch(bunch)
                    progbar.update(i)

        name = path
        index_file_name = index.index_file_name(name)
        with open(index_file_name, 'w') as fh:
            index.serialize(fh)
Exemplo n.º 4
0
def metadata_index(paths, processes=4):
    for path in paths:
        reader = ms_deisotope.MSFileLoader(path)
        try:
            fn = reader.prebuild_byte_offset_file
            if not reader.source._check_has_byte_offset_file():
                fn(path)
        except AttributeError:
            pass
        index, interval_tree = quick_index.index(reader, processes)
        name = path
        index_file_name = index.index_file_name(name)
        with open(index_file_name, 'w') as fh:
            index.serialize(fh)
Exemplo n.º 5
0
def oxonium_signature(ms_file, g_score_threshold=0.05):
    reader = ProcessedMzMLDeserializer(ms_file)
    if not reader.has_index_file():
        click.secho("Building temporary index...", fg='yellow')
        index, intervals = quick_index.index(ms_deisotope.MSFileLoader(ms_file))
        reader.extended_index = index
        with open(reader._index_file_name, 'w') as handle:
            index.serialize(handle)

    from glycan_profiling.tandem.glycan.scoring.signature_ion_scoring import SignatureIonScorer
    from glycan_profiling.tandem.oxonium_ions import gscore_scanner
    refcomp = glypy.GlycanComposition.parse("{Fuc:1; Hex:5; HexNAc:4; Neu5Ac:2}")
    for scan_id in reader.extended_index.msn_ids.keys():
        scan = reader.get_scan_by_id(scan_id)
        gscore = gscore_scanner(scan.deconvoluted_peak_set)
        if gscore >= g_score_threshold:
            signature_match = SignatureIonScorer.evaluate(scan, refcomp)
            click.echo("%s\t%f\t%r\t%f\t%f" % (
                scan_id, scan.precursor_information.neutral_mass,
                scan.precursor_information.charge, gscore,
                signature_match.score))
Exemplo n.º 6
0
def msfile_info(ms_file):
    reader = ProcessedMzMLDeserializer(ms_file)
    if not reader.has_index_file():
        index, intervals = quick_index.index(
            ms_deisotope.MSFileLoader(ms_file))
        reader.extended_index = index
        with open(reader._index_file_name, 'w') as handle:
            index.serialize(handle)
    click.echo("Name: %s" % (os.path.basename(ms_file), ))
    click.echo("MS1 Scans: %d" % (len(reader.extended_index.ms1_ids), ))
    click.echo("MSn Scans: %d" % (len(reader.extended_index.msn_ids), ))

    n_defaulted = 0
    n_orphan = 0

    charges = defaultdict(int)
    first_msn = float('inf')
    last_msn = 0
    for scan_info in reader.extended_index.msn_ids.values():
        n_defaulted += scan_info.get('defaulted', False)
        n_orphan += scan_info.get('orphan', False)
        charges[scan_info['charge']] += 1
        rt = scan_info['scan_time']
        if rt < first_msn:
            first_msn = rt
        if rt > last_msn:
            last_msn = rt

    click.echo("First MSn Scan: %0.2f Minutes" % (first_msn, ))
    click.echo("Last MSn Scan: %0.2f Minutes" % (last_msn, ))

    for charge, count in sorted(charges.items()):
        if not isinstance(charge, int):
            continue
        click.echo("Precursors with Charge State %d: %d" % (charge, count))

    click.echo("Defaulted MSn Scans: %d" % (n_defaulted, ))
    click.echo("Orphan MSn Scans: %d" % (n_orphan, ))
Exemplo n.º 7
0
def oxonium_signature(ms_file, g_score_threshold=0.05):
    reader = ProcessedMzMLDeserializer(ms_file)
    if not reader.has_index_file():
        click.secho("Building temporary index...", fg='yellow')
        index, intervals = quick_index.index(
            ms_deisotope.MSFileLoader(ms_file))
        reader.extended_index = index
        with open(reader._index_file_name, 'w') as handle:
            index.serialize(handle)

    from glycan_profiling.tandem.glycan.scoring.signature_ion_scoring import SignatureIonScorer
    from glycan_profiling.tandem.oxonium_ions import gscore_scanner
    refcomp = glypy.GlycanComposition.parse(
        "{Fuc:1; Hex:5; HexNAc:4; Neu5Ac:2}")
    for scan_id in reader.extended_index.msn_ids.keys():
        scan = reader.get_scan_by_id(scan_id)
        gscore = gscore_scanner(scan.deconvoluted_peak_set)
        if gscore >= g_score_threshold:
            signature_match = SignatureIonScorer.evaluate(scan, refcomp)
            click.echo("%s\t%f\t%r\t%f\t%f" %
                       (scan_id, scan.precursor_information.neutral_mass,
                        scan.precursor_information.charge, gscore,
                        signature_match.score))
Exemplo n.º 8
0
def msfile_info(ms_file):
    reader = ProcessedMzMLDeserializer(ms_file)
    if not reader.has_index_file():
        index, intervals = quick_index.index(ms_deisotope.MSFileLoader(ms_file))
        reader.extended_index = index
        with open(reader._index_file_name, 'w') as handle:
            index.serialize(handle)
    click.echo("Name: %s" % (os.path.basename(ms_file),))
    click.echo("MS1 Scans: %d" % (len(reader.extended_index.ms1_ids),))
    click.echo("MSn Scans: %d" % (len(reader.extended_index.msn_ids),))

    n_defaulted = 0
    n_orphan = 0

    charges = defaultdict(int)
    first_msn = float('inf')
    last_msn = 0
    for scan_info in reader.extended_index.msn_ids.values():
        n_defaulted += scan_info.get('defaulted', False)
        n_orphan += scan_info.get('orphan', False)
        charges[scan_info['charge']] += 1
        rt = scan_info['scan_time']
        if rt < first_msn:
            first_msn = rt
        if rt > last_msn:
            last_msn = rt

    click.echo("First MSn Scan: %0.2f Minutes" % (first_msn,))
    click.echo("Last MSn Scan: %0.2f Minutes" % (last_msn,))

    for charge, count in sorted(charges.items()):
        if not isinstance(charge, int):
            continue
        click.echo("Precursors with Charge State %d: %d" % (charge, count))

    click.echo("Defaulted MSn Scans: %d" % (n_defaulted,))
    click.echo("Orphan MSn Scans: %d" % (n_orphan,))
Exemplo n.º 9
0
    reader, lock = reader_index[path]
    values = request.values
    print(values)
    with lock:
        scan = reader.get_scan_by_id(scan_id)
        response = format_scan(scan, values)
    return response


if __name__ == "__main__":
    import sys
    for i, path in enumerate(sys.argv[1:]):
        print("Loading {0} with Key {1}".format(path, i))
        reader = MSFileLoader(path)
        index_path = ExtendedScanIndex.index_file_name(path)
        if os.path.exists(index_path):
            file_index = ExtendedScanIndex.load(open(index_path, 'rt'))
        else:
            print("Indexing {0}".format(path))
            reader.reset()
            file_index, scan_tree = quick_index.index(reader)
            reader.reset()
            with open(index_path, 'wt') as fh:
                file_index.dump(fh)
        print(file_index)
        metadata_index[path] = file_index
        reader_index[path] = reader, RLock()
        key_index[str(i)] = path

    app.run(threaded=True)