Пример #1
0
def test_deconvolute_spectrum(species, ratio):

    data = [calculate_abundance(sp) for sp in species]
    x = np.linspace(
        np.min([np.min(d["mz"]) for d in data]) * 0.95,
        np.max([np.max(d["mz"]) for d in data]) * 1.05,
        num=2000,
    )
    y = [gauss_conv(x, d["intensity"], d["mz"], noise=False) for d in data]

    scan = {}
    scan["mz"] = x
    scan["intensity"] = np.zeros_like(x)
    for i, r in enumerate(ratio):
        scan["intensity"] += y[i] * r

    calc_ratio, species_spectra = deconvolute_spectrum(scan, species)

    # Check Ratios
    print(ratio / np.max(ratio), calc_ratio)
    npt.assert_almost_equal(ratio / np.max(ratio), calc_ratio, decimal=2)
    # npt.assert_approx_equal(ratio / np.max(ratio), calc_ratio, significant=2)

    # Check reconstructed spectra
    y_final = np.zeros_like(x)
    for sp in species_spectra:
        intensity = sp["intensity"]
        mz = sp["mz"]
        y_final += gauss_conv(x, intensity, mz, noise=False)
    err_norm = np.linalg.norm(scan["intensity"] - y_final)
Пример #2
0
def test_find_ms_peaks(mol_formula, ans_peaks):

    d = calculate_abundance(mol_formula)
    x = np.linspace(np.min(d["mz"]) * 0.95, np.max(d["mz"]) * 1.05, num=1000)
    y = gauss_conv(x, d["intensity"], d["mz"], noise=False)

    nw = find_ms_peaks({"mz": x, "intensity": y}, prominence=0.05)

    err = np.linalg.norm(nw["mz"] - ans_peaks)
    npt.assert_equal(err < 0.1, True)
def test_abundance(mol_formula, ans):

    data = calculate_abundance(mol_formula)

    # Make sure our output data structure has all the parts we want
    npt.assert_equal(True, "mz" in data.keys())
    npt.assert_equal(True, "intensity" in data.keys())

    # Make sure the MZ and Intensity are correct
    npt.assert_equal(np.round(ans["mz"], decimals=2),
                     np.round(data["mz"], decimals=2))
    npt.assert_equal(np.round(ans["intensity"], decimals=2),
                     np.round(data["intensity"], decimals=2))
    npt.assert_equal(data["intensity"].size,
                     np.argwhere(data["intensity"] > 1e-4).size)

    new_tol = 1e-5
    data2 = calculate_abundance(mol_formula, tol=new_tol)
    npt.assert_equal(data2["intensity"].size,
                     np.argwhere(data2["intensity"] > new_tol).size)
Пример #4
0
# Here are some examples ranging from no overlap in the isotopic envelop to a lot of overlap.
# Uncomment the example you want to try below
#
species, ratio = (["InCl", "InCl2", "InC2H6"], [2, 35, 6])
# species, ratio = (["MoF2Cl2", "MoO6", "MoO2Cl2"], [1.5, 2.5, 4.7])
# species, ratio = (["MoF2Cl2", "MoF2HCl2", "MoO4Cl"], [29, 3100, 30])
# species, ratio = (["MoF2HCl2", "MoO4Cl"], [3100, 31])
# species, ratio = (["CO", "CN"], [17, 41])
# species, ratio = (["CO", "CN", "B2H", "O2"], [20, 40, 37, 12])
# species, ratio = (["FeS", "MnO2", "VF2", "GaNH2"], [45, 63, 21, 17])

#
# Set up data and deconvolute
#

data = [calculate_abundance(sp) for sp in species]
xmin = np.min([np.min(d["mz"]) for d in data]) * 0.95
xmax = np.max([np.max(d["mz"]) for d in data]) * 1.025
x = np.linspace(xmin, xmax, num=2000)
print(xmin, xmax)
y = [gauss_conv(x, d["intensity"], d["mz"], noise=False) for d in data]

scan = {}
scan["mz"] = x
scan["intensity"] = np.zeros_like(x)
for i, r in enumerate(ratio):
    scan["intensity"] += y[i] * r

calc_ratio, species_spectra = deconvolute_spectrum(scan,
                                                   species,
                                                   return_embedded=True)
Пример #5
0
def deconvolute_spectrum(data_dict: dict,
                         species: list,
                         decimals: int = 0,
                         return_embedded: bool = False) -> (np.ndarray, list):
    """Disentangle spectrum in `data_dict` given that the chemical species in the list
    `species` are the only systems present in the current mass window.
    
    Parameters
    ----------
    data_dict : dict        
        The spectrum, where data["mz"] is an array of the mass/charge ratios
        and data["intensity"] is an array of the intensities.
    species : list
        A list of strings where each element is the chemical formula for a species
        present in the mass window. E.g.  ["AlF3", "AlOF2"].
    decimals : int, optional
        Species how decimals the ideal m/z for each species must match, by default 1
    return_embedded: bool, optional
        Return the scalled embedded spectra, mostly so you can stack the bar plots,
        by default `False`.
    
    Returns
    -------
    `np.ndarray`
        A list of the population ratios for each species normalized to the largest
        value.
    list
        A list of dictionaries where each element is a spectrum, where data["mz"] is
        an array of the mass/charge ratios and data["intensity"] is an array of the
        intensities.
    """
    # warnings.filterwarnings("ignore")  # some numpy decprecation warnings are raised

    # Round all m/z as soon as we see them
    species_spectra = [calculate_abundance(sp) for sp in species]
    for sp in species_spectra:
        sp["mz"] = np.round(sp["mz"], decimals=decimals)

    # Create intersection of mzs
    all_mz = species_spectra[0]["mz"]
    for i, sp in enumerate(species_spectra):
        if i == 0:
            continue
        all_mz = np.concatenate((all_mz, sp["mz"]))
    # all_mz = np.round(all_mz, decimals=decimals)
    unique_mz = np.unique(all_mz)

    # print(all_mz)
    print("Unique MZ")
    print(unique_mz)

    # Get peaks from spectra that are in our predicted abundance
    b_data = find_ms_peaks(data_dict, prominence=0.05)
    b_data["mz"] = np.round(b_data["mz"], decimals=decimals)
    print(b_data["mz"])
    b_idx = [i for i, mz in enumerate(b_data["mz"]) if mz in unique_mz]
    mz_b = b_data["mz"][b_idx]
    b = b_data["intensity"][b_idx]

    # Embed isotopic spectra in full set spectrum of mz
    e_spectra = [embed_spectrum(sp, mz_b) for sp in species_spectra]

    print("\nPeaks from given spectra")
    print(mz_b)
    # print(b_idx)

    # Get subset of unique mz in in b_data
    A = np.zeros((b.shape[0], len(species)))
    for i, sp in enumerate(e_spectra):
        A[:, i] = sp["intensity"]

    # Solve linear problem using non-negative least squares
    print("\nCoefficient Matrix A:")
    print(A)
    print("\nCondition Number of A {:.3f}".format(np.linalg.cond(A)))
    if np.linalg.cond(A) > 1000:
        warnings.warn("The coefficient matrix is ill-conditioned!!"
                      "Check the ")
    x, R = nnls(A, b, maxiter=1000)

    x_norm = x / x.max()  # Normatlized to the largest value
    print("\nRatio of species\n================")
    for i, sp in enumerate(species):
        print("Spcecies: {:18s}  Relative ratio: {:.3f}  Abs. ratio {:.3f}".
              format(sp, x_norm[i], x[i]))
    print("\nError residual from fitting {:.4f}".format(R))

    # If any of the coefficients are 0, there are two possibilies
    # 1) That species wasn't in the mass window
    # 2) No peaks from that species were found in the spectrum
    if (x == 0).any():
        idx = np.argwhere(x == 0)
        for i in idx:
            i = i[0]
            print("\nERROR: Species {} has a linear coefficient of 0".format(
                species[i]))
            print("This could mean one of two things:")
            print("1) That species wasn't in the mass window. Or")
            print("2) No peaks from that species were found in the spectrum\n")
            raise AssertionError

    # Scale abundances so their sum matches the realistic spectras
    if return_embedded:  # return scaled embedded spectra if requested
        species_spectra = e_spectra
    for i, sp in enumerate(species_spectra):
        sp["intensity"] *= x[i]
    return x_norm, species_spectra
Пример #6
0
"""
This example shows how to calculate and plot the stick spectrum for the
isotopic abundance of molecule.

Author: James E. T. Smith <*****@*****.**>
Date: 1/20/20
"""

import matplotlib.pyplot as plt
import seaborn as sns

from msanalysis.data_processing import calculate_abundance

#
# Calculate Abundances for a Molecular Formula
#
data = calculate_abundance("Sn2C5H15")

#
# Plot
#

plt.figure()
sns.set_style("darkgrid")
plt.bar(data["mz"], data["intensity"], width=0.2)
plt.xlabel("M/Z")
plt.ylabel("Relative Fraction of Intensity")
plt.savefig("figures/ex6.png", dpi=600)
Пример #7
0
from msanalysis.data_processing import filter_spectrum
from msanalysis.data_processing import calculate_abundance


def gauss_conv(x, A, x0):
    y = np.zeros_like(x)
    for A_i, x0_i in zip(A, x0):
        y += A_i * np.exp(-0.5 * ((x - x0_i) / 0.1)**2)
    y += np.random.rand(x.size) * 0.05
    return y


#
# Create spectra from isotopic abundance
#
data = calculate_abundance("AlCl2Br")
x = np.linspace(150, 200, num=1000)
y = gauss_conv(x, data["intensity"], data["mz"])
scan0 = {}
scan0["mz"] = x
scan0["intensity"] = y

#
#
#
new_scan0 = filter_spectrum(scan0)

#
# Plot
#
plt.figure()