Пример #1
0
def test_save_as_mgf_single_spectrum():
    """Test saving spectrum to .mgf file"""
    spectrum = SpectrumBuilder().with_mz(
        numpy.array([100, 200, 300], dtype="float")).with_intensities(
            numpy.array([10, 10, 500], dtype="float")).with_metadata(
                {
                    "charge": -1,
                    "inchi": '"InChI=1S/C6H12"',
                    "pepmass": (100, 10.0),
                    "test_field": "test"
                },
                metadata_harmonization=False).build()

    # Write to test file
    with tempfile.TemporaryDirectory() as d:
        filename = os.path.join(d, "test.mgf")
        save_as_mgf(spectrum, filename)

        # test if file exists
        assert os.path.isfile(filename)

        # Test if content of mgf file is correct
        with open(filename, "r", encoding="utf-8") as f:
            mgf_content = f.readlines()
        assert mgf_content[0] == "BEGIN IONS\n"
        assert mgf_content[2] == "CHARGE=1-\n"
        assert mgf_content[4] == "TEST_FIELD=test\n"
        assert mgf_content[7].split(" ")[0] == "300.0"
Пример #2
0
def test_save_load_mgf_consistency(tmpdir, charge, ionmode, parent_mass):
    """Test saving and loading spectrum to .mgf file"""
    mz = numpy.array([100.1, 200.02, 300.003], dtype="float")
    intensities = numpy.array([0.01, 0.02, 1.0], dtype="float")
    metadata = {
        "precursor_mz": 200.5,
        "charge": charge,
        "ionmode": ionmode,
        "parent_mass": parent_mass
    }
    builder = SpectrumBuilder().with_mz(mz).with_intensities(intensities)
    spectrum1 = builder.with_metadata(metadata,
                                      metadata_harmonization=True).build()
    spectrum2 = builder.with_metadata(metadata,
                                      metadata_harmonization=True).build()

    # Write to test file
    filename = os.path.join(tmpdir, "test.mgf")
    save_as_mgf([spectrum1, spectrum2], filename)

    # Test if file exists
    assert os.path.isfile(filename)

    # Test importing spectra again
    spectrum_imports = list(load_from_mgf(filename))
    assert spectrum_imports[0].get("precursor_mz") == 200.5
    assert spectrum_imports[0].get("charge") == charge
    assert spectrum_imports[0].get("ionmode") == ionmode
    assert spectrum_imports[0].get("parent_mass") == str(parent_mass)
Пример #3
0
def test_save_as_mgf_spectrum_list():
    """Test saving spectrum list to .mgf file"""
    mz = numpy.array([100, 200, 300], dtype="float")
    intensities = numpy.array([10, 10, 500], dtype="float")
    builder = SpectrumBuilder().with_mz(mz).with_intensities(intensities)
    spectrum1 = builder.with_metadata({
        "test_field": "test1"
    },
                                      metadata_harmonization=False).build()
    spectrum2 = builder.with_metadata({
        "test_field": "test2"
    },
                                      metadata_harmonization=False).build()

    # Write to test file
    with tempfile.TemporaryDirectory() as d:
        filename = os.path.join(d, "test.mgf")
        save_as_mgf([spectrum1, spectrum2], filename)

        # test if file exists
        assert os.path.isfile(filename)

        # Test if content of mgf file is correct
        with open(filename, "r", encoding="utf-8") as f:
            mgf_content = f.readlines()
        assert mgf_content[5] == mgf_content[12] == "END IONS\n"
        assert mgf_content[1].split("=")[1] == "test1\n"
        assert mgf_content[8].split("=")[1] == "test2\n"
Пример #4
0
def test_save_as_mgf_spectrum_list():
    """Test saving spectrum list to .mgf file"""
    spectrum1 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"),
                         intensities=numpy.array([10, 10, 500], dtype="float"),
                         metadata={"test_field": "test1"})

    spectrum2 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"),
                         intensities=numpy.array([10, 10, 500], dtype="float"),
                         metadata={"test_field": "test2"})
    # Write to test file
    with tempfile.TemporaryDirectory() as d:
        filename = os.path.join(d, "test.mgf")
        save_as_mgf([spectrum1, spectrum2], filename)

        # test if file exists
        assert os.path.isfile(filename)

        # Test if content of mgf file is correct
        with open(filename, "r") as f:
            mgf_content = f.readlines()
        assert mgf_content[5] == mgf_content[12] == "END IONS\n"
        assert mgf_content[1].split("=")[1] == "test1\n"
        assert mgf_content[8].split("=")[1] == "test2\n"
Пример #5
0
def main(argv):
    parser = argparse.ArgumentParser(
        description="Compute MSP similarity scores")
    parser.add_argument("--spectra",
                        type=str,
                        required=True,
                        help="Mass spectra file to be filtered.")
    parser.add_argument("--spectra_format",
                        type=str,
                        required=True,
                        help="Format of spectra file.")
    parser.add_argument("--output",
                        type=str,
                        required=True,
                        help="Filtered mass spectra file.")
    parser.add_argument(
        "-normalise_intensities",
        action='store_true',
        help="Normalize intensities of peaks (and losses) to unit height.")
    parser.add_argument(
        "-default_filters",
        action='store_true',
        help=
        "Collection of filters that are considered default and that do no require any (factory) arguments."
    )
    parser.add_argument(
        "-clean_metadata",
        action='store_true',
        help=
        "Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata."
    )
    parser.add_argument(
        "-relative_intensity",
        action='store_true',
        help=
        "Keep only peaks within set relative intensity range (keep if to_intensity >= intensity >= from_intensity)."
    )
    parser.add_argument("--from_intensity",
                        type=float,
                        help="Lower bound for intensity filter")
    parser.add_argument("--to_intensity",
                        type=float,
                        help="Upper bound for intensity filter")
    parser.add_argument(
        "-mz_range",
        action='store_true',
        help=
        "Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz)."
    )
    parser.add_argument("--from_mz",
                        type=float,
                        help="Lower bound for m/z  filter")
    parser.add_argument("--to_mz",
                        type=float,
                        help="Upper bound for m/z  filter")
    args = parser.parse_args()

    if not (args.normalise_intensities or args.default_filters or
            args.clean_metadata or args.relative_intensity or args.mz_range):
        raise ValueError('No filter selected.')

    if args.spectra_format == 'msp':
        spectra = list(load_from_msp(args.spectra))
    elif args.queries_format == 'mgf':
        spectra = list(load_from_mgf(args.spectra))
    else:
        raise ValueError(
            f'File format {args.spectra_format} not supported for mass spectra file.'
        )

    filtered_spectra = []
    for spectrum in spectra:
        if args.normalise_intensities:
            spectrum = normalize_intensities(spectrum)

        if args.default_filters:
            spectrum = default_filters(spectrum)

        if args.clean_metadata:
            filters = [
                add_compound_name, add_precursor_mz, add_fingerprint,
                add_losses, add_parent_mass, add_retention_index,
                add_retention_time, clean_compound_name
            ]
            for metadata_filter in filters:
                spectrum = metadata_filter(spectrum)

        if args.relative_intensity:
            spectrum = select_by_relative_intensity(spectrum,
                                                    args.from_intensity,
                                                    args.to_intensity)

        if args.mz_range:
            spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz)

        filtered_spectra.append(spectrum)

    if args.spectra_format == 'msp':
        save_as_msp(filtered_spectra, args.output)
    else:
        save_as_mgf(filtered_spectra, args.output)

    return 0