Tutorial¶
- Installing Protomix
- Import packages
- Downloading the dataset from Zenodo link
- Unzipping the downloaded dataset
- Extract Acquisition parameters
- Extract Free Induction Decays (FID)
- Remove group delay
- Solvent residuals estimation
- Apodization
- Zero filling:
- Fourier Transformation
- Phase correction
- Internal referencing
- Baseline correction
- Region removal
- Negative values zeroing
- Icoshift class
- Peak alignment dataframe
- Window selection
- Binning
- Normalization
Caution
If you want to see the full code, it’s available on colab : link
Installing Protomix¶
!pip install Protomix
Import packages¶
1import numpy as np
2import pandas as pd
3import time
4
5import protomix as px
6
7import seaborn as sns
8import matplotlib.pyplot as plt
9from plotly.subplots import make_subplots
10import plotly.graph_objects as go
11import plotly.io as pio
12from sklearn.decomposition import PCA
13
14pio.templates.default = "plotly_white"
Downloading the dataset from Zenodo link¶
!wget -O NMRDataset.zip https://zenodo.org/record/13467227/files/NMRDataset.zip?download=1
Unzipping the downloaded dataset¶
!unzip NMRDataset.zip
main_directory = r'/content/spectra'
Caution
Directory where the Bruker data are located, please keep the same Bruker directory format.
Extract Acquisition parameters¶
acqus_df = px.extract_params(root_directory=main_directory)
Extract Free Induction Decays (FID)¶
fids_df = px.extract_fids(root_directory=main_directory, acqus_df=acqus_df)
You can plot raw FID using px.plot:
px.plot(x=fids_df.columns, y=fids_df.iloc[spectrIndex].apply(np.real), title='Free induction decay')
Remove group delay¶
grpd_df = px.group_delay_removal(fid_df=fids_df, acqus_df=acqus_df)
Note
Plot FID after group delay removal, zoom in this figure and the one before to see the difference
px.plot(x=grpd_df.columns, y=grpd_df.iloc[spectrIndex].apply(np.real), title='FID after group delay removal', xlabel='Time (s)', ylabel='Intensity (a.u.)')
Solvent residuals estimation¶
fids, solvents = px.solvent_residuals_removal(fid_df=grpd_df, returnSolvent=True)
Note
Plot FID before correction, solvent residuals, and FID after correction
fig = make_subplots(rows=1, cols=2, shared_yaxes=True)
fig.add_trace(go.Scatter(x=grpd_df.columns, y=grpd_df.iloc[spectrIndex].apply(np.real), line=dict(color='blue', width=0.5), name='FID Before Correction'), row=1, col=1)
fig.add_trace(go.Scatter(x=solvents.columns, y=solvents.iloc[spectrIndex].apply(np.real), line=dict(color='red', width=0.8), name='Estimated Solvent'), row=1, col=1)
fig.add_trace(go.Scatter(x=fids.columns, y=fids.iloc[spectrIndex].apply(np.real), line=dict(color='blue', width=0.5), name='FID After Correction'), row=1, col=2)
fig.update_layout(height=600, width=1000, title_text="Solvent Residuals Removal")
Apodization¶
apod_df = px.apodization(fids, LB=5, apodization_type='exponential')
px.plot(x=apod_df.columns, y=apod_df.iloc[spectrIndex].apply(np.real), title='Fid after apodization')
Zero filling:¶
Note
This function is adding points at the end of the FID to enhance the resolution. Compare the time domain with the plot before.
zf_df = px.zero_filling(fid_df=apod_df, acqus_df=acqus_df, target_points=10000)
px.plot(x=zf_df.columns, y=zf_df.iloc[spectrIndex].apply(np.real), title='FID after zero filling')
Fourier Transformation¶
spectra_df = px.fourier_transform(fid_df=zf_df, acqus_df=acqus_df)
px.plot(x=spectra_df.columns, y=spectra_df.iloc[spectrIndex].apply(np.real), title='FID after Fourier transform', xlabel='Chemical Shift (ppm)')
Phase correction¶
ph_df = px.phase_correction(spectra_df=spectra_df)
px.plot(ph_df.columns, ph_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after phase correction', xlabel='Chemical Shift (ppm)')
Internal referencing¶
ir_df = px.internal_referencing(spectra_df=ph_df)
Note
To plot the spectrum with original ppm and shifted spectrum values
fig = go.Figure()
fig.add_trace(go.Scatter(x=ir_df.columns.to_numpy(), y=ir_df.iloc[spectrIndex].values, mode='lines', line=dict(width=1), name='Signal'))
fig.add_annotation(x=0, y=100000000, text=f'Reference peak', showarrow=True, arrowhead=1, ax=0, ay=-30)
fig.update_layout(width=800, height=600)
fig.show()
Baseline correction¶
bc_df = px.baseline_correction(spectra_df=ir_df, method='airpls')
px.plot(bc_df.columns, bc_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after baseline correction', xlabel='Chemical shift')
Region removal¶
rv_df = px.region_removal(spectra_df=bc_df, range=(4.5, 6.1))
px.plot(rv_df.columns, rv_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after region removal', xlabel='Chemical shift')
Negative values zeroing¶
nvz_df = px.negative_values_zeroing(spectra_df=bc_df)
px.plot(nvz_df.columns, nvz_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after negative values zeroing', xlabel='Chemical shift')
Icoshift class¶
Note
We use Icoshift to do peak alignment. For More details can be found here : https://github.com/sekro/pyicoshift
icoshift = px.Icoshift()
icoshift.signals = nvz_df.values
icoshift.signal_names = list(nvz_df.index.values)
icoshift.inter = ('n_intervals', 100)
icoshift.target = 'median'
icoshift.global_pre_align = True
icoshift.max_shift = 'best'
icoshift.run()
Peak alignment dataframe¶
pa_df = pd.DataFrame(icoshift.result, index=nvz_df.index, columns=nvz_df.columns)
px.plot(pa_df.columns, pa_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after peak alignment', xlabel='Chemical Shift (ppm)')
Note
To plot the first five spectra after peak alignment
traces = [go.Scatter(x=pa_df.columns.values, y=icoshift.result[i, :]) for i in range(5)]
layout = go.Layout(title='Urine Dataset after peak alignment')
fig = go.Figure(data=traces, layout=layout)
pio.show(fig)
Window selection¶
ws_df = px.window_selection(spectra_df=pa_df)
px.plot(ws_df.columns, ws_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum', xlabel='Chemical shift (ppm)')
Binning¶
bin_df = px.binning(spectra_df=ws_df, bin_size=0.1, method='rectangular')
px.plot(bin_df.columns, bin_df.iloc[spectrIndex], title='NMR Spectrum', xlabel='Chemical shift (ppm)')
Normalization¶
norm_df = px.normalize(spectra_df=bin_df, method='TotalArea')
px.plot(norm_df.columns, norm_df.iloc[spectrIndex], title='NMR Spectrum after normalization', xlabel='Chemical shift (ppm)')
Note
To plot the first five spectra after normalization
1 traces = [go.Scatter(x=norm_df.columns.values, y=norm_df.to_numpy()[i, :]) for i in range(5)]
2
3 layout = go.Layout(title='Normalization')
4 fig = go.Figure(data=traces, layout=layout)
5 pio.show(fig)