Multiple gaussian fit issue

Question:

I have this code that aims to fit the data in here https://drive.google.com/file/d/1uBrHxuftALiQTcTeBl-s6AHGiC8DGBWV/view?usp=sharing.

Where the function read_file is used to stract the information in a proper way. I do not what i am doing wrong that the gaussian fit is not right as you can see in the image (Gaussian fit) , and the standar deviation for all the fitted gaussian is the same or it varies very litte (1e-09). Can anyone help me?

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import os



file_path='/content/drive/MyDrive/ProjetoXRF_Manuel/April2024/Amostra4.mca' # You need to change this to your directory

def read_file(file_path):
    # Abrir el archivo
    with open(file_path, 'r', encoding='latin1') as f:
        data = []
        live_time = None  # Inicializar variable live_time
        real_time = None  # Inicializar variable real_time
        for line in f:
            if 'LIVE_TIME' in line:
                live_time = line.strip()  # Almacenar la línea que contiene LIVE_TIME
            elif 'REAL_TIME' in line:
                real_time = line.strip()  # Almacenar la línea que contiene REAL_TIME
            elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
                # Analizar y almacenar datos numéricos como flotante único
                row = [float(x) for x in line.split()]
                data.extend(row)

    # Convertir 'data' a un arreglo de numpy
    return np.array(data), live_time, real_time



data, live_time, real_time = read_numeric_data(file_path)
a = -0.0076702251954372525
b = 0.03952691936704189
Data=data

peaks, _ = find_peaks(Data[:, 0] / maxim, height=0.1)

# Get peak values and indices
peak_values = Data[peaks] / maxim
peak_indices = peaks

# Definición de la función gaussiana
def gaus(x, a, x0, sigma):
    return a * np.exp(-(x - x0) ** 2 / (2 * sigma ** 2))
limite=(10-a)/b
print(limite)
# Datos
x_values = a + b * np.linspace(0, 270, 270)
y_values = np.squeeze(Data[:270]) / np.max(Data[:270])

# Parámetros iniciales
sigma = 2 # initial value
standar=[]
# Ajuste de curvas para diferentes conjuntos de datos
for i in range(len(peak_values)):
    mean = a + b * peak_indices[i] 
    amplitude = peak_values[i][0]
    
    # Ajustar la curva para cada conjunto de datos
    popt, pcov = curve_fit(gaus, x_values, y_values, p0=[amplitude, mean, 1])
    
    # Visualización del ajuste
    m,n,p= popt
    plt.plot(x_values, gaus(x_values,amplitude,mean,p), label='fit')

    print(mean, amplitude,"Desviación standar",p)

    standar.append(p)

plt.scatter(x_values, y_values, label='Data')
plt.xlim(4, 10)
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.grid(True)
plt.show()



Asked By: Manuel Borra

||

Answers:

The two main issues were:

  • The code fits just a single gaussian rather than a sum of gaussians. You need to fit (amplitude, mean, sigma) parameters for each gaussian, and add the resulting gaussians together.
  • The fitting algorithm seems quite sensitive to the initialisation of sigma, so I’ve used a better starting value.

enter image description here

The modified code:

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.signal import find_peaks

file_path='Amostra4.mca'

def read_file(file_path):
    # Abrir el archivo
    with open(file_path, 'r', encoding='latin1') as f:
        data = []
        live_time = None  # Inicializar variable live_time
        real_time = None  # Inicializar variable real_time
        for line in f:
            if 'LIVE_TIME' in line:
                live_time = line.strip()  # Almacenar la línea que contiene LIVE_TIME
            elif 'REAL_TIME' in line:
                real_time = line.strip()  # Almacenar la línea que contiene REAL_TIME
            elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
                # Analizar y almacenar datos numéricos como flotante único
                row = [float(x) for x in line.split()]
                data.extend(row)

    # Convertir 'data' a un arreglo de numpy
    return np.array(data), live_time, real_time

def multi_gaussian(x, *params):
    eps = 1e-10
    y = np.zeros_like(x)
    for i in range(0, len(params), 3):
        amplitude, mean, sigma = params[i:i+3]
        y += amplitude * np.exp(-(x - mean)**2 / (2 * sigma**2 + eps))
    return y

#Load and prepare data
data, live_time, real_time = read_file(file_path)

a = -0.0076702251954372525
b = 0.03952691936704189
y_start = 150
y_end = 270
data_y = data[y_start:y_end] / data[y_start:y_end].max()
data_x = a + b * np.linspace(y_start, y_end, num=len(data_y))

#Some initial estimates that could help with curve_fit() convergence
peak_indices, _ = find_peaks(data_y / data_y.max(), height=0.1)
peak_amplitudes = data_y[peak_indices]
peak_means = data_x[peak_indices]
peak_sigmas = [0.1] * len(peak_indices)

params_init = list(zip(peak_amplitudes, peak_means, peak_sigmas))
params_init = np.concatenate(params_init)

#Fit
params, params_cov = curve_fit(multi_gaussian, data_x, data_y, p0=params_init)

#Get a high-resolution interpolation of the fit
x_fine = np.linspace(data_x.min(), data_x.max(), num=500)
y_fit = multi_gaussian(x_fine, *params)

#
# Plot
#
fig, ax = plt.subplots(figsize=(8, 2))

#Data
ax.scatter(data_x, data_y, label='data', color='black', marker='o', s=20)

#Fitted curve
ax.plot(x_fine, y_fit, label='fit', linewidth=1.5, color='tab:purple')

#Annotate the estimated locations
[ax.axvline(mean, color='darkslategray', linewidth=1, linestyle=':') for mean in params[1::3]]

#Formatting
ax.set(xlabel='X', ylabel='Y', title='Data and fit')
ax.spines[['right', 'top']].set_visible(False)
ax.legend()
Answered By: Muhammed Yunus
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.