High resolution mass spectrometry

Behind the decimal point

Given our ability to create high precision m/z values by time averaged centroiding, we should now be able to derive statistics and deduce the elemental composition of different ions in our data.

from kendrick import read_mzml, histogram, get_time_averaged_centroids
mzml_file = '/home/frank/Work/DATA/kendrick-data/Ref0443_casein_asap01.mzML' # TODO: create download function 
df_pos, df_min = read_mzml(mzml_file)

mz_hist = histogram(df_pos)
mz_centroids = get_time_averaged_centroids(mz_hist)
mz_x, mz_y = mz_centroids.T

Let’s start by plotting this.

import matplotlib.pyplot as plt 
import numpy as np
fig, ax = plt.subplots()
ax.vlines(mz_x, ymin=np.zeros_like(mz_x), ymax=mz_y)
idxs = np.argsort(mz_y)[::-1]
mz_sorted = mz_centroids[idxs]
mz_sorted
array([[2.57247150e+02, 4.94319338e+09],
       [2.29216050e+02, 1.46894206e+09],
       [2.55233050e+02, 1.17589672e+09],
       ...,
       [1.60306550e+02, 4.08524079e+03],
       [1.48418350e+02, 4.08524079e+03],
       [1.31345550e+02, 4.08524079e+03]])

For now let’s pick the 10th mz value…

mz10 = mz_sorted[10][0]
mz10
197.12845

Can we use molmass or pyopenms to deduce the elemental composition?

import molmass as mm

This is the latest version

mm.__version__
'2023.8.30'
f = mm.Formula('CH2')
f.spectrum().dataframe()
Relative mass Fraction Intensity % m/z
Mass number
14 14.015650 9.890725e-01 1.000000e+02 14.015650
15 15.019066 1.092505e-02 1.104575e+00 15.019066
16 16.025297 2.473800e-06 2.501132e-04 16.025297
17 17.031558 1.415075e-10 1.430709e-08 17.031558

Let’s generate a bunch of elemental compositions…

import numpy as np
positions = np.indices([2, 2, 2]).T.reshape(-1, 3)
positions
array([[0, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [1, 1, 0],
       [0, 0, 1],
       [1, 0, 1],
       [0, 1, 1],
       [1, 1, 1]])
def formula(pos):  
    n1, n2, n3 = pos 
    formula_str = 'C'
formula_list = [] 
elements = ['C', 'H', 'O']
for p in positions: 
    formula = ''
    for i, n in enumerate(p): 
        if n == 0: 
            elem_str = ''
        elif n == 1: 
            elem_str = elements[i]
        else: 
            elem_str = f'{elements[i]}{n}'
        formula = formula + elem_str
    
    formula_list.append(formula)
formula_list
['', 'C', 'H', 'CH', 'O', 'CO', 'HO', 'CHO']
'C' * 0 + 'H' * 2
'HH'
elements np.unique(np.array(list('CHH')), return_counts=True)))
[(array(['C', 'H'], dtype='<U1'),), (array([1, 2]),)]
unique, counts
(array(['C', 'H'], dtype='<U1'), array([1, 2]))