scipy 为什么我的线性最小二乘法不能正确拟合数据点

cbwuti44  于 12个月前  发布在  其他
关注(0)|答案(1)|浏览(129)

我得到不太适合2D平面这样的代码:

import numpy as np
from scipy import linalg as la
from scipy.linalg import solve

# data
f1 = np.array([1., 1.5, 3.5, 4.])
f2 =  np.array([3., 4., 7., 7.25])
# z = np.array([6., 6.5, 8., 9.])
A= X= np.array([f1, f2]).T

b= y= np.array([0.5, 1., 1.5, 2.]).T

##################### la.lstsq

res= la.lstsq(A,b)[0]
print(res)    
##################### custom lu

#custom OLS 
def ord_ls(X, y):
    A = X.T @ X
    b = X.T @ y
    beta = solve(A, b, overwrite_a=True, overwrite_b=True,
                 check_finite=True)
    return beta

res = ord_ls(X, y)
print(res)

##################### plot

# use the optimized parameters to plot the fitted curve in 3D space.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create 3D plot of the data points and the fitted curve
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(f1, f2, y, color='blue')
x_range = np.linspace(0, 7, 100)
y_range = np.linspace(0, 7,100)

X, Y = np.meshgrid(x_range, y_range)
Z = res[0]*X + res[1]

ax.plot_surface(X, Y, Z, color='red', alpha=0.5)
ax.set_xlabel('feat.1')
ax.set_ylabel('feat.2')
ax.set_zlabel('target')
plt.show()

# [0.2961165  0.09475728]
# [0.2961165  0.09475728]

字符串
虽然coeffs似乎是相同的,但 *plot仍然是扭曲的 *。是否有任何解释或校正?或者需要一些正则化,如最小二乘法?或者两个特征共线,这就是原因?(我还不太熟悉Linalg)
p.s. scipy-0.18.0 docs,-考虑p.184 p, l, u = la.lu(A)处的LU分解

wj8zmpe1

wj8zmpe11#

重命名主题,x,y的LS拟合(Mult. Lin. Regr.),
使用intercept_column(转载自我的评论)-好吧,我认为这是因为0平方误差的总和当我们考虑Intersept时,Lin Regr(或残差)-因此,SSE最小化(包括截距的方程)明显适合OLS --从Normal Equationintercept, *coef = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)开始,我们使用方程(X.T.dot(X)).dot(β) = (X.T).dot(y),使L2-norm(xβ-y)最小化(或线性术语(ax-b)):

import numpy as np
from scipy import linalg as la
from scipy.linalg import solve

# data
f1 = np.array([1., 1.5, 3.5, 4.])
f2 =  np.array([3., 4., 7., 7.25])
# z = np.array([6., 6.5, 8., 9.])
X= np.array([f1, f2]).T
y= np.array([0.5, 1., 1.5, 2.]).T
# adds x0 = 1 to each instance
X = np.column_stack((np.ones(len(y)),X))    # CORRECTION

##################### la.lstsq

x= la.lstsq(X,y)[0]
print("lstsq:", x)

##################### LU-decomposition
from scipy.linalg import lu
p, l, u = lu(X)
# reconstruct
print(p @ l @ u)
print(np.allclose(X - p @ l @ u, np.zeros((4, 3))))

#####################
from scipy.linalg import lu_factor, lu_solve

lu, piv = lu_factor(X.T@X)  # if not cov.matrix (as is square) taken then ValueError: expected square matrix
x = lu_solve((lu, piv), X.T @ y)
print("lu_solve: ", x)

##################### custom ls

# LR: y= ax-b, MLR: beta= (X.T @ X)^(-1)(X.T @ y)
A = X.T @ X # take covariance matrix to make it square for solve !
b = X.T @ y
beta = solve(A, b, overwrite_a=True, overwrite_b=True,
                 check_finite=True)
print("solve: ", beta)

############
# calculate normal equation (NOT STABLE)
theta_best = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
# best values for theta
intercept, *coef = theta_best
print(f"Intercept: {intercept}\n\
Coefficients: {coef}")

##################### plot

# use the optimized parameters to plot the fitted curve in 3D space.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create 3D plot of the data points and the fitted curve
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(f1, f2, y, color='blue')
x_range = np.linspace(0, 7, 100)
y_range = np.linspace(0, 7,100)

X, Y = np.meshgrid(x_range, y_range)
Z = beta[0] + beta[1]*X + beta[2]*Y

ax.plot_surface(X, Y, Z, color='red', alpha=0.5)
ax.set_xlabel('feat.1')
ax.set_ylabel('feat.2')
ax.set_zlabel('target')
plt.show()

字符串
N.B.
不要用矩阵求逆来求解线性方程组专业的算法不会求解矩阵求逆。这很慢,而且会引入不必要的误差。对于小系统来说,这不是一场灾难,但为什么要做一些次优的事情呢?基本上,任何时候你看到数学公式为:x = A^-1 * b,你都需要:x = np.linalg.solve(A,b)
x1c 0d1x的数据

相关问题