我得到错误AttributeError: 'LogisticRegression' object has no attribute 'feature_names_in_'
,即使该属性写为in the docs。
我用的是scikit-learn
版本。
我创建了一个对象LogisticRegression
,并试图使用feature_names_in_
的文档属性,但它返回了一个错误。
#imports
import numpy as np
import pandas as pd
import statistics
import scipy.sparse
from scipy.stats import chi2_contingency
from sklearn.preprocessing import FunctionTransformer, MinMaxScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
# train_test_split()
X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state = 42)
#create functions for preprocessing
# function to replace NaN's in the ordinal and interval data
def replace_NAN_median(X_df):
opinions = ['opinion_seas_vacc_effective', 'opinion_seas_risk', 'opinion_seas_sick_from_vacc', 'household_adults',
'household_children']
for column in opinions:
X_df[column].replace(np.nan, X_df[column].median(), inplace = True)
return X_df
# function to replace NaN's in the catagorical data
def replace_NAN_mode(X_df):
miss_cat_features = ['education', 'income_poverty', 'marital_status', 'rent_or_own', 'employment_status']
for column in miss_cat_features:
X_df[column].replace(np.nan, statistics.mode(X_df[column]), inplace = True)
return X_df
# Instantiate transformers
NAN_median = FunctionTransformer(replace_NAN_median)
NAN_mode = FunctionTransformer(replace_NAN_mode)
col_transformer = ColumnTransformer(transformers=
# replace NaN's in the binary data
[("NAN_0", SimpleImputer(missing_values=np.nan, strategy='constant', fill_value = 0),
['behavioral_antiviral_meds', 'behavioral_avoidance','behavioral_face_mask' ,
'behavioral_wash_hands', 'behavioral_large_gatherings', 'behavioral_outside_home',
'behavioral_touch_face', 'doctor_recc_seasonal', 'chronic_med_condition',
'child_under_6_months', 'health_worker', 'health_insurance']),
# MinMaxScaler on our numeric ordinal and interval data
("scaler", MinMaxScaler(), ['opinion_seas_vacc_effective', 'opinion_seas_risk',
'opinion_seas_sick_from_vacc',
'household_adults', 'household_children']),
# OHE catagorical string data
("ohe", OneHotEncoder(sparse = False), ['age_group','education', 'race', 'sex',
'income_poverty', 'marital_status', 'rent_or_own',
'employment_status', 'census_msa'])],
remainder="passthrough")
# Preprocessing Pipeline
preprocessing_pipe = Pipeline(steps=[
("NAN_median", NAN_median),
("NAN_mode", NAN_mode),
("col_transformer", col_transformer)
])
# model
logreg_optimized_pipe = Pipeline(steps=[("preprocessing_pipe", preprocessing_pipe),
("log_reg", LogisticRegression(solver = 'liblinear', random_state = 42, C = 10, penalty= 'l1'))])
#fit model to training data
logreg_optimized_pipe.fit(X_train, y_train)
#trying to get feature names
logreg_optimized_pipe.named_steps["log_reg"].feature_names_in_
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-38-512bfaf5962d> in <module>
----> 1 logreg_optimized_pipe.named_steps["log_reg"].feature_names_in_
AttributeError: 'LogisticRegression' object has no attribute 'feature_names_in_'
我也愿意接受关于如何获得特性名称的其他建议。
1条答案
按热度按时间qacovj5a1#
Docs说明如下:
feature_names_in_ndarray of shape(n_features_in_,)拟合过程中看到的特征名称。仅在X的特征名称全部为字符串时定义。
应确保到达模型数据在中具有名称此外,仅在调用fit时才定义该数据
链接到您的版本1.0.2 LogisticRegression的文档