我试图用Scipy找到一条曲线的方程,但我得到了一个错误。我很确定正确的方程是1-(exp(-ax+b)/(1+exp(-ax+b))。这是为了表示一个logit函数。
x轴和y轴的数字实际上是来自一个logit函数,我正试图反求系数(a和b)。
我得到的错误表明这是不可行的?
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
# extracted from plot
xData = numpy.array([-1500,
-992.407809110628,
-507.59219088937,
-449.023861171366,
-377.440347071583,
-335.140997830802,
-299.349240780911,
-263.557483731019,
-218.004338394793,
-178.958785249457,
-152.9284164859,
-123.644251626898,
-110.629067245119,
-91.1062906724512,
-74.8373101952279,
-61.822125813449,
-55.3145336225593,
-22.7765726681127,
0,
16.2689804772235,
39.0455531453362,
58.5683297180039,
74.8373101952274,
100.867678958785,
123.644251626898,
139.913232104121,
159.436008676789,
182.212581344902,
191.973969631236,
221.258134490238,
237.527114967462,
247.288503253796,
276.572668112798,
299.349240780911,
315.618221258134,
325.379609544468,
344.902386117136,
357.917570498915,
380.694143167027,
406.724511930585,
432.754880694143,
475.054229934924,
510.845986984815,
553.145336225596,
601.952277657267,
644.251626898047,
702.819956616052,
761.388286334056,
823.210412147505,
888.286334056399,
979.39262472885,
1037.96095444685,
1086.76789587852,
1138.82863340563,
1210.41214750542,
1252.7114967462,
1301.51843817787,
1363.34056399132])
yData = numpy.array([0.00160513643659698,
0.00372906968241948,
-0.00372906968241992,
-0.00706468943569538,
-0.00240248186822578,
0.00071726270268746,
0.00545607114131807,
0.00858974314335125,
0.0181230697450929,
0.0292754602145519,
0.0452711148560425,
0.0564443964721814,
0.0692576331027179,
0.0852672151753288,
0.102888897400096,
0.11730727046723,
0.131739570965484,
0.160562389668631,
0.197431781701444,
0.234315101165377,
0.28242044825437,
0.330532759058923,
0.388282852198618,
0.436381235572051,
0.500537947027015,
0.551867494420322,
0.61924144246404,
0.660926243806645,
0.710664582194475,
0.760361138288945,
0.795639321316281,
0.834141704647931,
0.858156077756847,
0.891815196916466,
0.90622660626804,
0.923862215923928,
0.93345125225015,
0.943054216007492,
0.955846561491349,
0.963816533949854,
0.974996779281553,
0.982931933162257,
0.990881014474082,
0.995605895481593,
0.997106576184788,
1.00183145719229,
1.00170611031221,
1.00158076343213,
1.00305358927309,
1.00291431496189,
1.007534740236,
1.00580425691932,
1.00730493762251,
1.00879865461015,
1.00864545286783,
1.00534465169235,
1.00684533239555,
1.0115284311097])
def func(x, a, b): # from the zunzun.com "function finder"
return (1-((numpy.exp(-a*x+b))/(1+numpy.exp(-a*x+b))))
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val)**2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
minData = min(minX, minY)
maxData = max(maxX, maxY)
parameterBounds = []
parameterBounds.append([minData, maxData]) # search bounds for a
parameterBounds.append([minData, maxData]) # search bounds for b
#parameterBounds.append([minData, maxData]) # search bounds for Offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=2)
print(parameterBounds)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
print(geneticParameters)
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters, maxfev=5000000)
print('Fitted parameters:', f' a: {fittedParameters[0]}', f' b: {fittedParameters[1]}')
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
1条答案
按热度按时间sbtkgmzw1#
您的代码实际上几乎可以正常工作了。经过两处更改后,它可以完美地工作:
1.我将逻辑函数的定义改为Lambda(a*x+b),其中Lambda(y)= 1/(1+np.exp(-y)),这样在保留解释的同时需要较少的计算(一些代数表明,这两个定义在从b到-b的变化中是相似的)。
1.您仍然面临着一个非线性优化问题。因此,初始值非常重要。我刚刚从curve_fit()中删除了输入“geneticParameters”,从而强制该算法使用默认的初始参数开始。结果拟合看起来不错,RMSE为0.0066,R平方为0.9998。作为比较:根据拟合的最佳参数是a:0.0115和B:-1.3748,而您的geneticParameters分别选择了10.23和-1431.41。因此,geneticParameters的参数猜测值相差甚远,这就解释了收敛问题。
修改后的代码可在下面找到。