numpy 我有错误类型错误:* 不支持的操作数类型:“float”和“NoneType”尝试预测y_pred逻辑回归

mzsu5hc0  于 2023-01-17  发布在  其他
关注(0)|答案(3)|浏览(206)

我有一个关于糖尿病患者的数据集,有很多示例。
每个示例都用特定的类(二进制,0或1)进行分类(标记)
我在尝试预测Y ^时遇到了问题
但我有问题

  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.model_selection import train_test_split
  4. def load_cvs(filename):
  5. data = []
  6. labels = []
  7. with open(filename, 'r') as f:
  8. for line in f:
  9. items = line.split(",")
  10. data.append([float(items[0]),float(items[1]),float(items[2]),float(items[3]),float(items[4]),float(items[5])])
  11. labels.append(int(items[6]))
  12. return np.array(data), np.array(labels)
  13. X,y = load_cvs('diabetes.csv')
  14. df = pd.read_csv("diabetes.csv")
  15. #Glucose;BloodPressure;SkinThickness;Insulin;BMI;Age
  16. X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=33)
  17. def sigmoid(z):
  18. return 1 / (1 + np.exp(-z))
  19. def predict(X,w):
  20. z = np.dot(X,w)
  21. return sigmoid(z)
  22. def cost(y , y_pred):
  23. return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
  24. def train(X, y, epochs = 1000, lr =0.02):
  25. X = np.c_[np.ones((X.shape[0], 1)), X]
  26. w = np.random.randn(X.shape[1])
  27. for epoch in range(epochs):
  28. y_pred = predict(X,w)
  29. error = y_pred - y
  30. gradient = np.dot(X.T, error) / y.size
  31. w -= lr * gradient
  32. if epoch % 100 == 0:
  33. c = cost(y,y_pred)
  34. print(f'Epoch{epoch}: cost = {c}')
  35. return w
  36. w = train(X_train,y_train,epochs=1000, lr=0.02)
  37. X_test=X_test
  38. y_pred = predict(np.c_[np.ones((X_test.shape[0], 1)), X_test], w)

我收到此错误消息:TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'
我更改了代码,但仍有问题

  1. import numpy as np
  2. from random import seed
  3. from random import randrange
  4. from sklearn.model_selection import train_test_split
  5. from csv import reader
  6. import pandas as pd
  7. from sklearn.linear_model import LogisticRegression
  8. from sklearn import preprocessing
  9. from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
  10. from matplotlib import pyplot as plt
  11. import seaborn as sns
  12. def load_cvs(filename):
  13. data = []
  14. labels = []
  15. with open(filename, 'r') as f:
  16. for line in f:
  17. items = line.split(",")
  18. data.append([float(items[0]),float(items[1]),float(items[2]),float(items[3]),float(items[4]),float(items[5])])
  19. labels.append(int(items[6]))
  20. return np.array(data), np.array(labels)
  21. seed(1)
  22. col_names = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age','Outcome']
  23. df = pd.read_csv('diabetes.csv', header=None, names=col_names)
  24. nandf = df.fillna(value=0)
  25. #nandf = df[df.isna().any(axis=1)]
  26. #nandf.head()
  27. feature_cols = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']
  28. X = nandf[feature_cols] # Features
  29. y = nandf.Outcome # Target variable
  30. X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=16)
  31. def sigmoid(z):
  32. return 1 / (1 + np.exp(-z))
  33. def predict(X,w):
  34. z = np.dot(X,w)
  35. return sigmoid(z)
  36. def cost(y , y_pred):
  37. return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
  38. def train(X, y, epochs = 1000, lr =0.02):
  39. X = np.c_[np.ones((X.shape[0], 1)), X]
  40. w = np.random.randn(X.shape[1])
  41. for epoch in range(epochs):
  42. y_pred = predict(X,w)
  43. error = y_pred - y
  44. gradient = np.dot(X.T, error) / y.size
  45. w -= lr * gradient
  46. if epoch % 100 == 0:
  47. c = cost(y,y_pred)
  48. print(f'Epoch{epoch}: cost = {c}')
  49. return w
  50. w = train(X_train,y_train,epochs=1000, lr=0.02)
  51. y_pred = predict(np.c_[np.ones((X_test.shape[0], 1)), X_test], w) # this line[enter image description here][1]
  52. #print(f'prediction: {y_pred}')

enter image description here
enter image description here

wn9m85ua

wn9m85ua1#

您的数据中似乎有空值,您必须删除或填充它们:

  1. X, y = load_cvs('diabetes.csv')
  2. df = pd.read_csv('diabetes.csv')
  3. df = df.dropna() # <- HERE

要查找具有空值的行,请用途:

  1. X, y = load_cvs('diabetes.csv')
  2. df = pd.read_csv('diabetes.csv')
  3. nandf = df[df.isna().any(axis=1)] # <- HERE
jucafojl

jucafojl2#

  1. df.dropna(inplace=True)

我觉得你应该试试这个。

aamkag61

aamkag613#

根据错误图像,问题出在这个函数中的w上。您是否尝试将错误显示与代码匹配?

  1. def predict(X,w):
  2. z = np.dot(X,w)
  3. return sigmoid(z)

wNone,不能在数学中使用(这应该是显而易见的!)
w从何而来?

  1. w = train(X_train,y_train,epochs=1000, lr=0.02)

trainif中有一条return语句,如果if不为真,train返回什么?
这是你的代码,你有责任测试每一部分。

相关问题