First load some libraries
from __future__ import print_function, division
import array
import os.path as op
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
%matplotlib inline
locate the data-set which should be in the repository
ls files/FTICR-Files/
so now we can read it
BASE = 'files/FTICR-Files/ESI_pos_Ubiquitin_000006.d'
F = open( op.join(BASE, 'fid'), 'rb')
tbuf = F.read()
fid = np.array(array.array('i',tbuf))
F.close()
len(fid)
fid[1000:1010]
plt.plot(fid)
small complication
There are 2 fft packages available
numpy.fft
scipy.fftpack
we are going to use numpy.fft
sp = np.fft.fft(fid)
print (sp[0])
len(sp)
# default size of the figures
mpl.rcParams['figure.figsize'] = [10.0,8.0]
# this helps matplotlib to draw the huge vectors we're using (4-8Mpoints)
mpl.rcParams['agg.path.chunksize'] = 100000
plt.plot(sp)
3 remarks
1 syntax remark
from numpy.fft import fft, rfft
sp = rfft(fid)
print (sp[0])
len(sp)
plt.plot(abs(sp)) # absolute value is modulus
# let's zoom in
plt.plot(abs(sp[715000:725000]))
# without the abs() we see the phase rotating
plt.plot(sp[720000:722000])
plt.figure()
# we rotate by 90° the phase
plt.plot(1j*sp[720000:722000])
recovery of the full resolution by zerofilling
### apodisation
plt.plot(np.hamming(100))
there is also blackman
hanning
bartlett
...
sp2 = rfft( np.hamming(len(fid))*fid )
plt.plot(abs(sp2))
plt.figure()
plt.plot(abs(sp2[720000:722000]))
Notice how lineshape is improved, and the isotopic pattern intensities restored
We can superimpose both processing, zooming on a line you can see that FWHM is sligthly worse when apodized.
Theory is 1.5 for hamming window, but does not really show here.
plt.plot(abs(sp)/max(sp))
plt.plot(abs(sp2)/max(sp2))
plt.xlim(721050,721080)
zerofilling consists in adding zero at the end of the FID to interpolate points in the spectrum and smooth the result
( and an example of slighlty different numpy arithmetics)
n = len(fid)
fidzf = np.zeros(2*n)
fidzf[:n] = fid[:]
fidzf[:n] *= np.hamming(n)
spzf = abs(rfft( fidzf ))
plt.plot(abs(spzf[2*720000:2*722000]))
lineshape and isotopic pattern intensities further improved !
plt.subplot(211)
plt.plot(abs(sp2[721100:721200]), label='direct')
plt.legend()
plt.subplot(212)
plt.plot(abs(spzf[2*721100:2*721200]), label='with zerofilling')
plt.legend()
In FT-ICR, the frequency $f$ depends on the magnetic field $B_o$ the charge $z$ and the mass $m$ : $$ f = \frac{B_o z}{m} \quad \Rightarrow \quad m/z = \frac{B_o}{f}$$ We are going to use a sligthly extended equation which takes cares of experimental imperfections: $$ m/z = \frac{M_1}{M_2+f}$$ where $M_1$ and $M_2$ are constants determined precisely by a calibration procedure
FT-ICR parameters are stored in the .method files
ls 'files/FTICR-Files/ESI_pos_Ubiquitin_000006.d/ESI_pos_150_3000.m/apexAcquisition.method'
parameter are stored in a XML file, we can look through the file and get the parameters manually, but it is also possible to simply build a dictionary from the entries.
Here a simple mini parser, using standard python library
from xml.dom import minidom
def read_param(filename):
"""
Open the given file and retrieve all parameters from apexAcquisition.method
NC is written when no value for value is found
structure : <param name = "AMS_ActiveExclusion"><value>0</value></param>
read_param returns values in a dictionnary
"""
xmldoc = minidom.parse(filename)
x = xmldoc.documentElement
pp = {}
children = x.childNodes
for child in children:
if (child.nodeName == 'paramlist'):
params = child.childNodes
for param in params:
if (param.nodeName == 'param'):
k = str(param.getAttribute('name'))
for element in param.childNodes:
if element.nodeName == "value":
try:
v = str(element.firstChild.toxml())
#print v
except:
v = "NC"
pp[k] = v
return pp
pfile = op.join(BASE, 'ESI_pos_150_3000.m', 'apexAcquisition.method')
param = read_param(pfile)
print (param['TD'])
# print the 10 first entries
print(list(param.keys())[:10])
# we find the parameters in the parameter file
sw = float(param['SW_h_Broadband'])
ml1 = float(param['ML1'])
ml2 = float(param['ML2'])
print('Spectral width: {}'.format(sw) )
print('constant M_1: {}'.format(ml1))
print('constant M_2: {}'.format(ml2))
faxis = np.linspace(0, sw, len(spzf)) # the freq axis from 0 to sw
mzaxis = ml1/(ml2+faxis) # and the mzaxis
plt.plot(mzaxis, spzf)
plt.xlim(856.5, 858.5)
plt.xlabel("$m/z$");
we can zoom on the monoisotopic peak, and try to determine precisely its value.
We know the theorical mass is 856.969496 (for the $z=10$ state).
plt.plot(mzaxis, spzf)
plt.xlim(856.9, 857.0)
plt.ylim(0,6E7)
plt.xlabel("$m/z$");
# these functions convert back and forth from index to m/z
def itomz(val, N):
"""transform index to m/z for N points,
using current ml1 ml2 and sw
"""
f = sw*val/N
return ml1/(ml2+f)
def mztoi(m, N):
"""transform m/z to index for N points,
using current ml1 ml2 and sw
"""
f = ml1/m - ml2
return N*f/sw
theo = 856.9694962104804
def ppm(theorical, measured):
return 1E6*(measured-theorical)/measured
we can compute the vector coordinates of the previous zoom window
start = int( mztoi(857.0, len(spzf)))
end = int( mztoi(856.9, len(spzf)))
print("start: {} - end: {}".format(start,end))
plt.plot(spzf[start:end])
top = spzf[start:end].argmax() + start
meas1 = itomz(top,len(spzf))
print("maximum is at: {}, for m/z: {}".format(top,meas1))
print("For an error of {:.3f} ppm".format(ppm(theo, meas1)))
# peak barycenter
bary = 0.0
s = 0
for i in range(-3, +4):
bary += i*spzf[i+top]
s += spzf[i+top]
mbary = itomz(bary/s+top, len(spzf))
print ("peak barycenter is at {}".format(mbary))
print("For an error of {:.3f} ppm".format(ppm(theo, mbary)))
Now we can simply build a function doing all this at once :
import glob
def read_fticr(folder):
"""
load and process the data Solarix Apex FTICR file found in folder
uses the calibration from the parameter file
eg:
spectrum,axis = read_fticr('FTICR/Files/bruker ubiquitin file/ESI_pos_Ubiquitin_000006.d')
"""
# find and load parameters
L = glob.glob(op.join(folder,"*","apexAcquisition.method"))
if len(L)>1:
raise Exception( "You have more than 1 apexAcquisition.method file in the %s folder, using the first one"%folder )
elif len(L) == 0:
raise Exception( "You don't have any apexAcquisition.method file in the %s folder, please double check the path"%folder )
param = read_param(L[0])
# load data
n = int(param['TD'])
fidzf = np.zeros(2*n)
with open( op.join(BASE, 'fid'), 'rb') as F: # 'with' a better way of reading a file
tbuf = F.read(4*int(param['TD']))
fidzf[:n] = np.array(array.array('i',tbuf)) # [:] is less memory intensive
# process
fidzf[:n] *= np.hamming(n)
spectrum = abs( rfft( fidzf ) )
# calibrate
sw = float(param['SW_h_Broadband'])
ml1 = float(param['ML1'])
ml2 = float(param['ML2'])
faxis = np.linspace(0, sw, len(spectrum)) # the freq axis from 0 to sw
mzaxis = ml1/(ml2+faxis) # and the mzaxis
return spectrum, mzaxis
spectrum,axis = read_fticr('files/FTICR-Files/ESI_pos_Ubiquitin_000006.d')
# %matplotlib
plt.plot(axis, spectrum)
plt.xlabel("$m/z$")
plt.xlim(200,2500)
plt.plot(axis, spectrum)
plt.xlabel("$m/z$")
plt.xlim(650,1300)
plt.title("Zooming in")
plt.plot(axis, spectrum)
plt.xlabel("$m/z$")
plt.xlim(855,865)
plt.title("Zooming further");
SPIKE is a complete software suite we're building to do this, and much more
It is distributed here : https://bitbucket.org/delsuc/spike
However, the program is still in development, stay tuned !
import sys
sys.path.append('/Users/mad/') # adapt this to your own set-up
import spike
from spike.File import Apex
File = 'files/FTICR-Files/080617-insulin_2M_MS_000001.d'
dd = Apex.Import_1D(File)
dd.unit = "sec"
dd.display()
dd.hamming().zf(2).rfft().modulus()
dd.unit = "m/z"
dd.display(zoom=(500,2000))
dd.pp(threshold=1E7)
print ("detected %d peaks"%len(dd.peaks))
dd.display(zoom=(1150,1180))
dd.display_peaks(zoom=(856.5, 858.5))
p = dd.peaks[40]
p.report()
p.label = "Peak 40"
p.report() # in index
p.report(f=dd.axis1.itomz) # provides in m/z
# string is :
# id, label, m/z, intensity, width (0 if not determined)
dd.centroid()
for p in dd.peaks:
p.label = "%.5f"%(dd.axis1.itomz(p.pos))
# this switches to a different display mode
dd.display(zoom=(1155,1160))
dd.display_peaks(zoom=(1155,1160)) #, peak_label=True)