scipy 用Python脚本求曲线方程

gkl3eglg  于 2022-11-10  发布在  Python
关注(0)|答案(1)|浏览(149)

我试图用Scipy找到一条曲线的方程,但我得到了一个错误。我很确定正确的方程是1-(exp(-ax+b)/(1+exp(-ax+b))。这是为了表示一个logit函数。
x轴和y轴的数字实际上是来自一个logit函数,我正试图反求系数(a和b)。
我得到的错误表明这是不可行的?

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings

# extracted from plot

xData = numpy.array([-1500,
-992.407809110628,
-507.59219088937,
-449.023861171366,
-377.440347071583,
-335.140997830802,
-299.349240780911,
-263.557483731019,
-218.004338394793,
-178.958785249457,
-152.9284164859,
-123.644251626898,
-110.629067245119,
-91.1062906724512,
-74.8373101952279,
-61.822125813449,
-55.3145336225593,
-22.7765726681127,
0,
16.2689804772235,
39.0455531453362,
58.5683297180039,
74.8373101952274,
100.867678958785,
123.644251626898,
139.913232104121,
159.436008676789,
182.212581344902,
191.973969631236,
221.258134490238,
237.527114967462,
247.288503253796,
276.572668112798,
299.349240780911,
315.618221258134,
325.379609544468,
344.902386117136,
357.917570498915,
380.694143167027,
406.724511930585,
432.754880694143,
475.054229934924,
510.845986984815,
553.145336225596,
601.952277657267,
644.251626898047,
702.819956616052,
761.388286334056,
823.210412147505,
888.286334056399,
979.39262472885,
1037.96095444685,
1086.76789587852,
1138.82863340563,
1210.41214750542,
1252.7114967462,
1301.51843817787,
1363.34056399132])
yData = numpy.array([0.00160513643659698,
0.00372906968241948,
-0.00372906968241992,
-0.00706468943569538,
-0.00240248186822578,
0.00071726270268746,
0.00545607114131807,
0.00858974314335125,
0.0181230697450929,
0.0292754602145519,
0.0452711148560425,
0.0564443964721814,
0.0692576331027179,
0.0852672151753288,
0.102888897400096,
0.11730727046723,
0.131739570965484,
0.160562389668631,
0.197431781701444,
0.234315101165377,
0.28242044825437,
0.330532759058923,
0.388282852198618,
0.436381235572051,
0.500537947027015,
0.551867494420322,
0.61924144246404,
0.660926243806645,
0.710664582194475,
0.760361138288945,
0.795639321316281,
0.834141704647931,
0.858156077756847,
0.891815196916466,
0.90622660626804,
0.923862215923928,
0.93345125225015,
0.943054216007492,
0.955846561491349,
0.963816533949854,
0.974996779281553,
0.982931933162257,
0.990881014474082,
0.995605895481593,
0.997106576184788,
1.00183145719229,
1.00170611031221,
1.00158076343213,
1.00305358927309,
1.00291431496189,
1.007534740236,
1.00580425691932,
1.00730493762251,
1.00879865461015,
1.00864545286783,
1.00534465169235,
1.00684533239555,
1.0115284311097])

def func(x, a, b): # from the zunzun.com "function finder"
    return (1-((numpy.exp(-a*x+b))/(1+numpy.exp(-a*x+b))))

# function for genetic algorithm to minimize (sum of squared error)

def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = func(xData, *parameterTuple)
    return numpy.sum((yData - val)**2.0)

def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    minData = min(minX, minY)
    maxData = max(maxX, maxY)

    parameterBounds = []
    parameterBounds.append([minData, maxData]) # search bounds for a
    parameterBounds.append([minData, maxData]) # search bounds for b
    #parameterBounds.append([minData, maxData]) # search bounds for Offset

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=2)
    print(parameterBounds)
    return result.x

# by default, differential_evolution completes by calling curve_fit() using parameter bounds

geneticParameters = generate_Initial_Parameters()
print(geneticParameters)

# now call curve_fit without passing bounds from the genetic algorithm,

# just in case the best fit parameters are aoutside those bounds

fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters, maxfev=5000000)
print('Fitted parameters:', f' a: {fittedParameters[0]}', f' b: {fittedParameters[1]}')
print()

modelPredictions = func(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))

print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()

########################################################## 

# graphics output section

def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = func(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
sbtkgmzw

sbtkgmzw1#

您的代码实际上几乎可以正常工作了。经过两处更改后,它可以完美地工作:
1.我将逻辑函数的定义改为Lambda(a*x+b),其中Lambda(y)= 1/(1+np.exp(-y)),这样在保留解释的同时需要较少的计算(一些代数表明,这两个定义在从b到-b的变化中是相似的)。
1.您仍然面临着一个非线性优化问题。因此,初始值非常重要。我刚刚从curve_fit()中删除了输入“geneticParameters”,从而强制该算法使用默认的初始参数开始。结果拟合看起来不错,RMSE为0.0066,R平方为0.9998。作为比较:根据拟合的最佳参数是a:0.0115和B:-1.3748,而您的geneticParameters分别选择了10.23和-1431.41。因此,geneticParameters的参数猜测值相差甚远,这就解释了收敛问题。
修改后的代码可在下面找到。

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings

# extracted from plot

xData = numpy.array([-1500,
-992.407809110628,
-507.59219088937,
-449.023861171366,
-377.440347071583,
-335.140997830802,
-299.349240780911,
-263.557483731019,
-218.004338394793,
-178.958785249457,
-152.9284164859,
-123.644251626898,
-110.629067245119,
-91.1062906724512,
-74.8373101952279,
-61.822125813449,
-55.3145336225593,
-22.7765726681127,
0,
16.2689804772235,
39.0455531453362,
58.5683297180039,
74.8373101952274,
100.867678958785,
123.644251626898,
139.913232104121,
159.436008676789,
182.212581344902,
191.973969631236,
221.258134490238,
237.527114967462,
247.288503253796,
276.572668112798,
299.349240780911,
315.618221258134,
325.379609544468,
344.902386117136,
357.917570498915,
380.694143167027,
406.724511930585,
432.754880694143,
475.054229934924,
510.845986984815,
553.145336225596,
601.952277657267,
644.251626898047,
702.819956616052,
761.388286334056,
823.210412147505,
888.286334056399,
979.39262472885,
1037.96095444685,
1086.76789587852,
1138.82863340563,
1210.41214750542,
1252.7114967462,
1301.51843817787,
1363.34056399132])
yData = numpy.array([0.00160513643659698,
0.00372906968241948,
-0.00372906968241992,
-0.00706468943569538,
-0.00240248186822578,
0.00071726270268746,
0.00545607114131807,
0.00858974314335125,
0.0181230697450929,
0.0292754602145519,
0.0452711148560425,
0.0564443964721814,
0.0692576331027179,
0.0852672151753288,
0.102888897400096,
0.11730727046723,
0.131739570965484,
0.160562389668631,
0.197431781701444,
0.234315101165377,
0.28242044825437,
0.330532759058923,
0.388282852198618,
0.436381235572051,
0.500537947027015,
0.551867494420322,
0.61924144246404,
0.660926243806645,
0.710664582194475,
0.760361138288945,
0.795639321316281,
0.834141704647931,
0.858156077756847,
0.891815196916466,
0.90622660626804,
0.923862215923928,
0.93345125225015,
0.943054216007492,
0.955846561491349,
0.963816533949854,
0.974996779281553,
0.982931933162257,
0.990881014474082,
0.995605895481593,
0.997106576184788,
1.00183145719229,
1.00170611031221,
1.00158076343213,
1.00305358927309,
1.00291431496189,
1.007534740236,
1.00580425691932,
1.00730493762251,
1.00879865461015,
1.00864545286783,
1.00534465169235,
1.00684533239555,
1.0115284311097])

def func(x, a, b): # from the zunzun.com "function finder"
    #return (1-((numpy.exp(-a*x+b))/(1+numpy.exp(-a*x+b))))
    # => Rephrase logit function
    return (1/(1+numpy.exp(- (a*x+b) )))

# function for genetic algorithm to minimize (sum of squared error)

def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = func(xData, *parameterTuple)
    return numpy.sum((yData - val)**2.0)

def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    minData = min(minX, minY)
    maxData = max(maxX, maxY)

    parameterBounds = []
    parameterBounds.append([minData, maxData]) # search bounds for a
    parameterBounds.append([minData, maxData]) # search bounds for b
    #parameterBounds.append([minData, maxData]) # search bounds for Offset

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=2)
    print(parameterBounds)
    return result.x

# by default, differential_evolution completes by calling curve_fit() using parameter bounds

geneticParameters = generate_Initial_Parameters()
print(geneticParameters)

# now call curve_fit without passing bounds from the genetic algorithm,

# just in case the best fit parameters are aoutside those bounds

fittedParameters, pcov = curve_fit(func, xData, yData, maxfev=5000000)
print('Fitted parameters:', f' a: {fittedParameters[0]}', f' b: {fittedParameters[1]}')
print()

modelPredictions = func(xData, *fittedParameters)

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))

print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()

########################################################## 

# graphics output section

def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = func(xModel, *fittedParameters)
    print(yModel)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

相关问题