scipy 为什么我的线性最小二乘法不能正确拟合数据点

cbwuti44 于 12个月前发布在其他

关注(0)|答案(1)|浏览(129)

我得到不太适合2D平面这样的代码：

import numpy as np
from scipy import linalg as la
from scipy.linalg import solve

# data
f1 = np.array([1., 1.5, 3.5, 4.])
f2 =  np.array([3., 4., 7., 7.25])
# z = np.array([6., 6.5, 8., 9.])
A= X= np.array([f1, f2]).T

b= y= np.array([0.5, 1., 1.5, 2.]).T

##################### la.lstsq

res= la.lstsq(A,b)[0]
print(res)    
##################### custom lu

#custom OLS 
def ord_ls(X, y):
    A = X.T @ X
    b = X.T @ y
    beta = solve(A, b, overwrite_a=True, overwrite_b=True,
                 check_finite=True)
    return beta

res = ord_ls(X, y)
print(res)

##################### plot

# use the optimized parameters to plot the fitted curve in 3D space.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create 3D plot of the data points and the fitted curve
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(f1, f2, y, color='blue')
x_range = np.linspace(0, 7, 100)
y_range = np.linspace(0, 7,100)

X, Y = np.meshgrid(x_range, y_range)
Z = res[0]*X + res[1]

ax.plot_surface(X, Y, Z, color='red', alpha=0.5)
ax.set_xlabel('feat.1')
ax.set_ylabel('feat.2')
ax.set_zlabel('target')
plt.show()

# [0.2961165  0.09475728]
# [0.2961165  0.09475728]

字符串
虽然coeffs似乎是相同的，但 *plot仍然是扭曲的 *。是否有任何解释或校正？或者需要一些正则化，如最小二乘法？或者两个特征共线，这就是原因？（我还不太熟悉Linalg）
p.s. scipy-0.18.0 docs，-考虑p.184 p, l, u = la.lu(A)处的LU分解

scipy

来源：https://stackoverflow.com/questions/77714338/why-my-linear-least-squares-does-not-fit-right-the-data-points

1条答案

按热度按时间

wj8zmpe11#

重命名主题，x，y的LS拟合（Mult. Lin. Regr.），
使用intercept_column（转载自我的评论）-好吧，我认为这是因为0平方误差的总和当我们考虑Intersept时，Lin Regr（或残差）-因此，SSE最小化（包括截距的方程）明显适合OLS --从Normal Equationintercept, *coef = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)开始，我们使用方程(X.T.dot(X)).dot(β) = (X.T).dot(y)，使L2-norm(xβ-y)最小化（或线性术语(ax-b)）：

import numpy as np
from scipy import linalg as la
from scipy.linalg import solve

# data
f1 = np.array([1., 1.5, 3.5, 4.])
f2 =  np.array([3., 4., 7., 7.25])
# z = np.array([6., 6.5, 8., 9.])
X= np.array([f1, f2]).T
y= np.array([0.5, 1., 1.5, 2.]).T
# adds x0 = 1 to each instance
X = np.column_stack((np.ones(len(y)),X))    # CORRECTION

##################### la.lstsq

x= la.lstsq(X,y)[0]
print("lstsq:", x)

##################### LU-decomposition
from scipy.linalg import lu
p, l, u = lu(X)
# reconstruct
print(p @ l @ u)
print(np.allclose(X - p @ l @ u, np.zeros((4, 3))))

#####################
from scipy.linalg import lu_factor, lu_solve

lu, piv = lu_factor(X.T@X)  # if not cov.matrix (as is square) taken then ValueError: expected square matrix
x = lu_solve((lu, piv), X.T @ y)
print("lu_solve: ", x)

##################### custom ls

# LR: y= ax-b, MLR: beta= (X.T @ X)^(-1)(X.T @ y)
A = X.T @ X # take covariance matrix to make it square for solve !
b = X.T @ y
beta = solve(A, b, overwrite_a=True, overwrite_b=True,
                 check_finite=True)
print("solve: ", beta)

############
# calculate normal equation (NOT STABLE)
theta_best = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
# best values for theta
intercept, *coef = theta_best
print(f"Intercept: {intercept}\n\
Coefficients: {coef}")

##################### plot

# use the optimized parameters to plot the fitted curve in 3D space.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create 3D plot of the data points and the fitted curve
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(f1, f2, y, color='blue')
x_range = np.linspace(0, 7, 100)
y_range = np.linspace(0, 7,100)

X, Y = np.meshgrid(x_range, y_range)
Z = beta[0] + beta[1]*X + beta[2]*Y

ax.plot_surface(X, Y, Z, color='red', alpha=0.5)
ax.set_xlabel('feat.1')
ax.set_ylabel('feat.2')
ax.set_zlabel('target')
plt.show()

字符串
N.B.的
不要用矩阵求逆来求解线性方程组专业的算法不会求解矩阵求逆。这很慢，而且会引入不必要的误差。对于小系统来说，这不是一场灾难，但为什么要做一些次优的事情呢？基本上，任何时候你看到数学公式为：x = A^-1 * b，你都需要：x = np.linalg.solve（A，b）
x1c 0d1x的数据

赞(0）回复(0）举报 12个月前

我来回答

scipy 为什么我的线性最小二乘法不能正确拟合数据点

1条答案

相关问题

热门标签

最新问答