我有一个关于糖尿病患者的数据集,有很多示例。
每个示例都用特定的类(二进制,0或1)进行分类(标记)
我在尝试预测Y ^时遇到了问题
但我有问题
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def load_cvs(filename):
data = []
labels = []
with open(filename, 'r') as f:
for line in f:
items = line.split(",")
data.append([float(items[0]),float(items[1]),float(items[2]),float(items[3]),float(items[4]),float(items[5])])
labels.append(int(items[6]))
return np.array(data), np.array(labels)
X,y = load_cvs('diabetes.csv')
df = pd.read_csv("diabetes.csv")
#Glucose;BloodPressure;SkinThickness;Insulin;BMI;Age
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=33)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def predict(X,w):
z = np.dot(X,w)
return sigmoid(z)
def cost(y , y_pred):
return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
def train(X, y, epochs = 1000, lr =0.02):
X = np.c_[np.ones((X.shape[0], 1)), X]
w = np.random.randn(X.shape[1])
for epoch in range(epochs):
y_pred = predict(X,w)
error = y_pred - y
gradient = np.dot(X.T, error) / y.size
w -= lr * gradient
if epoch % 100 == 0:
c = cost(y,y_pred)
print(f'Epoch{epoch}: cost = {c}')
return w
w = train(X_train,y_train,epochs=1000, lr=0.02)
X_test=X_test
y_pred = predict(np.c_[np.ones((X_test.shape[0], 1)), X_test], w)
我收到此错误消息:TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'
我更改了代码,但仍有问题
import numpy as np
from random import seed
from random import randrange
from sklearn.model_selection import train_test_split
from csv import reader
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from matplotlib import pyplot as plt
import seaborn as sns
def load_cvs(filename):
data = []
labels = []
with open(filename, 'r') as f:
for line in f:
items = line.split(",")
data.append([float(items[0]),float(items[1]),float(items[2]),float(items[3]),float(items[4]),float(items[5])])
labels.append(int(items[6]))
return np.array(data), np.array(labels)
seed(1)
col_names = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age','Outcome']
df = pd.read_csv('diabetes.csv', header=None, names=col_names)
nandf = df.fillna(value=0)
#nandf = df[df.isna().any(axis=1)]
#nandf.head()
feature_cols = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']
X = nandf[feature_cols] # Features
y = nandf.Outcome # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=16)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def predict(X,w):
z = np.dot(X,w)
return sigmoid(z)
def cost(y , y_pred):
return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
def train(X, y, epochs = 1000, lr =0.02):
X = np.c_[np.ones((X.shape[0], 1)), X]
w = np.random.randn(X.shape[1])
for epoch in range(epochs):
y_pred = predict(X,w)
error = y_pred - y
gradient = np.dot(X.T, error) / y.size
w -= lr * gradient
if epoch % 100 == 0:
c = cost(y,y_pred)
print(f'Epoch{epoch}: cost = {c}')
return w
w = train(X_train,y_train,epochs=1000, lr=0.02)
y_pred = predict(np.c_[np.ones((X_test.shape[0], 1)), X_test], w) # this line[enter image description here][1]
#print(f'prediction: {y_pred}')
3条答案
按热度按时间wn9m85ua1#
您的数据中似乎有空值,您必须删除或填充它们:
要查找具有空值的行,请用途:
jucafojl2#
我觉得你应该试试这个。
aamkag613#
根据错误图像,问题出在这个函数中的
w
上。您是否尝试将错误显示与代码匹配?w
是None
,不能在数学中使用(这应该是显而易见的!)w
从何而来?train
在if
中有一条return
语句,如果if
不为真,train
返回什么?这是你的代码,你有责任测试每一部分。