我试图将对数正态分布拟合到直方图数据中。我试图遵循Stack Exchange上的其他问题的示例,但我没有得到拟合,因为在这种情况下,我有一个断轴。我已经将断轴放在该图上,我试图防止数字在轴上重叠,我从重复的轴中删除数字,我减小了第二个子图的大小,但是我不能拟合对数正态分布。我怎样才能拟合这个数据集的对数正态分布呢?
代码:
#amostra 17B (menor intervalo)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True
frequencia_relativa=[0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000]
x=[0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00]
plt.rcParams["figure.figsize"] = [20,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')
axes = f.add_subplot(111, frameon=False)
ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(x, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(x, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))
plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,0.14)
ax2.set_xlim(7.0,8.0)
def func(x, pos): # formatter function takes tick label and tick position
s = str(x)
ind = s.index('.')
return s[:ind] + ',' + s[ind+1:] # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
# This looks pretty good, and was fairly painless, but you can get that
# cut-out diagonal lines look with just a bit more work. The important
# thing to know here is that in axes coordinates, which are always
# between 0-1, spine endpoints are at these locations (0,0), (0,1),
# (1,0), and (1,1). Thus, we just need to put the diagonals in the
# appropriate corners of each of our axes, and so long as we use the
# right transform and disable clipping.
d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa', va='center', rotation='vertical', fontsize=22)
#ax.set_xlabel('Tamanho lateral do triângulo ($\mu m$)', fontsize=22)
#ax.set_ylabel('Frequência relativa', fontsize=22)
#x_axis = np.arange(0, 29, 0.001)
#ax.plot(x_axis, norm.pdf(x_axis,2.232,1.888), linewidth=3)
f.tight_layout()
plt.show()
#plt.savefig('output.png', dpi=500, bbox_inches='tight')
字符串
的数据
尝试使用curve_fit:
#amostra 17B (menor intervalo)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True
from scipy.optimize import *
frequencia_relativa=[0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000]
x=[0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00]
plt.rcParams["figure.figsize"] = [20,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')
axes = f.add_subplot(111, frameon=False)
ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
def f(x, mu, sigma) :
return 1/(np.sqrt(2*np.pi)*sigma*x)*np.exp(-((np.log(x)-
mu)**2)/(2*sigma**2))
params, extras = curve_fit(f, x, frequencia_relativa)
plt.plot(x, f(x ,params[0], params[1]))
print("mu=%g, sigma=%g" % (params[0], params[1]))
plt.subplots_adjust(wspace=0.04)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
f.tight_layout()
plt.show()
#plt.savefig('output.png', dpi=500, bbox_inches='tight')
型
的
错误代码:
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
#from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True
from scipy.optimize import curve_fit
x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)
frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')
def fun(y, mu, sigma):
return 1.0/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))
step = 0.1
xx = x
nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)
frequencia_relativa /= nrm # normalize frequences histogram
print(np.sum(frequencia_relativa*step)) # check normalizatio
params, extras = curve_fit(fun, xx, frequencia_relativa)
print(params[0])
print(params[1])
axes = f.add_subplot(111, frameon=False)
axes.plot(x, fun(x, params[0], params[1]), "b-", linewidth=3)
ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.axvspan(0.190, 1.616, label='Média $\pm$ desvio padrão', ymin=0.0, ymax=1.0, alpha=0.2, color='Plum')
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(xx, height=frequencia_relativa, label='Frequência relativa do tamanho lateral triangular', alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(xx, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
#plt.plot(xx, frequencia_relativa, "ro")
ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))
plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,1.4)
ax2.set_xlim(7.0,8.0)
def func(x, pos): # formatter function takes tick label and tick position
s = str(x)
ind = s.index('.')
return s[:ind] + ',' + s[ind+1:] # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa', va='center', rotation='vertical', fontsize=22)
#ax.set_xlabel('Tamanho lateral do triângulo ($\mu m$)', fontsize=22)
#ax.set_ylabel('Frequência relativa', fontsize=22)
#x_axis = np.arange(0, 29, 0.001)
#ax.plot(x_axis, norm.pdf(x_axis,2.232,1.888), linewidth=3)
ax.axvline(0.903, color='k', linestyle='-', linewidth=1.3)
ax.axvline(0.190, color='k', linestyle='--', linewidth=1)
ax.axvline(1.616, color='k', linestyle='--', linewidth=1)
f.legend(loc=9,
bbox_to_anchor=(.79,.99),
labelspacing=1.5,
numpoints=1,
columnspacing=0.2,
ncol=1, fontsize=18)
ax.text(0.903*0.70, 1.4*0.92, '$\mu$ = (0,90 $\pm$ 0,71) $\mu m$', fontsize=20)
f.tight_layout()
plt.show()
型
的
1条答案
按热度按时间mkh04yzy1#
你要同时做漂亮的图表和拟合,你帮你拟合,图表是次要的问题。
首先,对数据使用NumPy数组,帮助很大。其次,直方图函数是反规范化的。
所以如果在第一个程序中,我会规范化freqs数组
字符串
并将Y限制设置为1.4,我会得到下面的图表
x1c 0d1x的数据
然后,在拟合部分,我将进行类似的变换,并将X轴移动一半步长,以便直方图值位于bin的中间,拟合开始工作,代码,Python 3.9.1 Win 10 x64。我删除了与拟合无关的所有内容,只是为了让它适合你,并绘制拟合函数与输入数据。
我也不太明白归一化积分的部分(直方图中所有条形的总和为1,因为它是相对频率),我不明白步长和移位的选择。你能更好地解释这一部分吗?
你要拟合的函数是对数范数分布的双参数PDF。它的条件是0 ∞ PDF(x,μ,σ)=1。你必须以同样的方式来处理你的输入数据。对于直方图,积分是bin乘以步长的总和。步长显然是0.1,所以我计算这个总和,检查它不是1,然后用归一化值除以频率,这样积分等于1。您可以尝试拟合不是2参数曲线,而是3参数曲线,第三个参数是归一化值,但非线性拟合中的更多参数意味着您可能会遇到更多问题。
不移位,我们必须做一个假设,bin的值描述了什么。我假设bin的值应该是bin中间的值。同样,这是一个假设,我不知道你的数据是如何产生的,也许直方图值真的是bin左侧的值。如果是这样,你只需删除移位并重新编译代码。
型
我得到了对数正态曲线,它看起来与输入数据一致。两条曲线的大部分数据都在[0.2]区间,峰值在~(0.8,1.2)。这里是最简单的图,它将拟合曲线(蓝色)与频率直方图箱的中心(红点)重叠。现在你可以尝试将它放入你的花式图中,祝你好运。
仅供参考,代码拟合3参数对数范数曲线适用于非正规化数据。似乎工作以及
型
的