Tutorial

Caution

If you want to see the full code, it’s available on colab : link

Installing Protomix

!pip install Protomix

Import packages

 1import numpy as np
 2import pandas as pd
 3import time
 4
 5import protomix as px
 6
 7import seaborn as sns
 8import matplotlib.pyplot as plt
 9from plotly.subplots import make_subplots
10import plotly.graph_objects as go
11import plotly.io as pio
12from sklearn.decomposition import PCA
13
14pio.templates.default = "plotly_white"

Unzipping the downloaded dataset

!unzip NMRDataset.zip
main_directory = r'/content/spectra'

Caution

Directory where the Bruker data are located, please keep the same Bruker directory format.

Extract Acquisition parameters

acqus_df = px.extract_params(root_directory=main_directory)

Extract Free Induction Decays (FID)

fids_df = px.extract_fids(root_directory=main_directory, acqus_df=acqus_df)

You can plot raw FID using px.plot:

px.plot(x=fids_df.columns, y=fids_df.iloc[spectrIndex].apply(np.real), title='Free induction decay')

Remove group delay

grpd_df = px.group_delay_removal(fid_df=fids_df, acqus_df=acqus_df)

Note

Plot FID after group delay removal, zoom in this figure and the one before to see the difference

px.plot(x=grpd_df.columns, y=grpd_df.iloc[spectrIndex].apply(np.real), title='FID after group delay removal', xlabel='Time (s)', ylabel='Intensity (a.u.)')

Solvent residuals estimation

fids, solvents = px.solvent_residuals_removal(fid_df=grpd_df, returnSolvent=True)

Note

Plot FID before correction, solvent residuals, and FID after correction

fig = make_subplots(rows=1, cols=2, shared_yaxes=True)

fig.add_trace(go.Scatter(x=grpd_df.columns, y=grpd_df.iloc[spectrIndex].apply(np.real), line=dict(color='blue', width=0.5), name='FID Before Correction'), row=1, col=1)
fig.add_trace(go.Scatter(x=solvents.columns, y=solvents.iloc[spectrIndex].apply(np.real), line=dict(color='red', width=0.8), name='Estimated Solvent'), row=1, col=1)
fig.add_trace(go.Scatter(x=fids.columns, y=fids.iloc[spectrIndex].apply(np.real), line=dict(color='blue', width=0.5), name='FID After Correction'), row=1, col=2)

fig.update_layout(height=600, width=1000, title_text="Solvent Residuals Removal")

Apodization

apod_df = px.apodization(fids, LB=5, apodization_type='exponential')
px.plot(x=apod_df.columns, y=apod_df.iloc[spectrIndex].apply(np.real), title='Fid after apodization')

Zero filling:

Note

This function is adding points at the end of the FID to enhance the resolution. Compare the time domain with the plot before.

zf_df = px.zero_filling(fid_df=apod_df, acqus_df=acqus_df, target_points=10000)
px.plot(x=zf_df.columns, y=zf_df.iloc[spectrIndex].apply(np.real), title='FID after zero filling')

Fourier Transformation

spectra_df = px.fourier_transform(fid_df=zf_df, acqus_df=acqus_df)
px.plot(x=spectra_df.columns, y=spectra_df.iloc[spectrIndex].apply(np.real), title='FID after Fourier transform', xlabel='Chemical Shift (ppm)')

Phase correction

ph_df = px.phase_correction(spectra_df=spectra_df)
px.plot(ph_df.columns, ph_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after phase correction', xlabel='Chemical Shift (ppm)')

Internal referencing

ir_df = px.internal_referencing(spectra_df=ph_df)

Note

To plot the spectrum with original ppm and shifted spectrum values

fig = go.Figure()

fig.add_trace(go.Scatter(x=ir_df.columns.to_numpy(), y=ir_df.iloc[spectrIndex].values, mode='lines', line=dict(width=1), name='Signal'))
fig.add_annotation(x=0, y=100000000, text=f'Reference peak', showarrow=True, arrowhead=1, ax=0, ay=-30)
fig.update_layout(width=800, height=600)

fig.show()

Baseline correction

bc_df = px.baseline_correction(spectra_df=ir_df, method='airpls')
px.plot(bc_df.columns, bc_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after baseline correction', xlabel='Chemical shift')

Region removal

rv_df = px.region_removal(spectra_df=bc_df, range=(4.5, 6.1))
px.plot(rv_df.columns, rv_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after region removal', xlabel='Chemical shift')

Negative values zeroing

nvz_df = px.negative_values_zeroing(spectra_df=bc_df)
px.plot(nvz_df.columns, nvz_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after negative values zeroing', xlabel='Chemical shift')

Icoshift class

Note

We use Icoshift to do peak alignment. For More details can be found here : https://github.com/sekro/pyicoshift

icoshift = px.Icoshift()

icoshift.signals = nvz_df.values
icoshift.signal_names = list(nvz_df.index.values)
icoshift.inter = ('n_intervals', 100)
icoshift.target = 'median'
icoshift.global_pre_align = True
icoshift.max_shift = 'best'

icoshift.run()

Peak alignment dataframe

pa_df = pd.DataFrame(icoshift.result, index=nvz_df.index, columns=nvz_df.columns)
px.plot(pa_df.columns, pa_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum after peak alignment', xlabel='Chemical Shift (ppm)')

Note

To plot the first five spectra after peak alignment

traces = [go.Scatter(x=pa_df.columns.values, y=icoshift.result[i, :]) for i in range(5)]

layout = go.Layout(title='Urine Dataset after peak alignment')
fig = go.Figure(data=traces, layout=layout)
pio.show(fig)

Window selection

ws_df = px.window_selection(spectra_df=pa_df)
px.plot(ws_df.columns, ws_df.iloc[spectrIndex].apply(np.real), title='NMR Spectrum', xlabel='Chemical shift (ppm)')

Binning

bin_df = px.binning(spectra_df=ws_df, bin_size=0.1, method='rectangular')
px.plot(bin_df.columns, bin_df.iloc[spectrIndex], title='NMR Spectrum', xlabel='Chemical shift (ppm)')

Normalization

norm_df = px.normalize(spectra_df=bin_df, method='TotalArea')
px.plot(norm_df.columns, norm_df.iloc[spectrIndex], title='NMR Spectrum after normalization', xlabel='Chemical shift (ppm)')

Note

To plot the first five spectra after normalization

1 traces = [go.Scatter(x=norm_df.columns.values, y=norm_df.to_numpy()[i, :]) for i in range(5)]
2
3 layout = go.Layout(title='Normalization')
4 fig = go.Figure(data=traces, layout=layout)
5 pio.show(fig)