我正在使用SMOTE对一些数据进行重新采样,并收到如下错误:
属性错误:'NoneType'对象没有'split'属性
我代码:
sm = SMOTE(random_state = 42)
X_train_resampled, y_train_resampled = sm.fit_resample(X_train_final, y_train)
有人能帮我解决这个问题吗?因为似乎我没有任何问题,我的数据
全部问题:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-49-9465f7b6ac21> in <module>
1 #resample data using SMOTE
2 sm = SMOTE(random_state = 42)
----> 3 X_train_resampled, y_train_resampled = sm.fit_resample(X_train_final, y_train)
~\AppData\Roaming\Python\Python38\site-packages\imblearn\base.py in fit_resample(self, X, y)
81 )
82
---> 83 output = self._fit_resample(X, y)
84
85 y_ = (
~\AppData\Roaming\Python\Python38\site-packages\imblearn\over_sampling\_smote\base.py in _fit_resample(self, X, y)
322
323 self.nn_k_.fit(X_class)
--> 324 nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
325 X_new, y_new = self._make_samples(
326 X_class, y.dtype, class_sample, X_class, nns, n_samples, 1.0
~\AppData\Roaming\Python\Python38\site-packages\sklearn\neighbors\_base.py in kneighbors(self, X, n_neighbors, return_distance)
761 )
762 if use_pairwise_distances_reductions:
--> 763 results = PairwiseDistancesArgKmin.compute(
764 X=X,
765 Y=self._fit_X,
sklearn\metrics\_pairwise_distances_reduction.pyx in sklearn.metrics._pairwise_distances_reduction.PairwiseDistancesArgKmin.compute()
~\AppData\Roaming\Python\Python38\site-packages\sklearn\utils\fixes.py in threadpool_limits(limits, user_api)
149 return controller.limit(limits=limits, user_api=user_api)
150 else:
--> 151 return threadpoolctl.threadpool_limits(limits=limits, user_api=user_api)
152
153
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in __init__(self, limits, user_api)
169 self._check_params(limits, user_api)
170
--> 171 self._original_info = self._set_threadpool_limits()
172
173 def __enter__(self):
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in _set_threadpool_limits(self)
266 return None
267
--> 268 modules = _ThreadpoolInfo(prefixes=self._prefixes,
269 user_api=self._user_api)
270 for module in modules:
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in __init__(self, user_api, prefixes, modules)
338
339 self.modules = []
--> 340 self._load_modules()
341 self._warn_if_incompatible_openmp()
342 else:
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in _load_modules(self)
371 self._find_modules_with_dyld()
372 elif sys.platform == "win32":
--> 373 self._find_modules_with_enum_process_module_ex()
374 else:
375 self._find_modules_with_dl_iterate_phdr()
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in _find_modules_with_enum_process_module_ex(self)
483
484 # Store the module if it is supported and selected
--> 485 self._make_module_from_path(filepath)
486 finally:
487 kernel_32.CloseHandle(h_process)
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in _make_module_from_path(self, filepath)
513 if prefix in self.prefixes or user_api in self.user_api:
514 module_class = globals()[module_class]
--> 515 module = module_class(filepath, prefix, user_api, internal_api)
516 self.modules.append(module)
517
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in __init__(self, filepath, prefix, user_api, internal_api)
604 self.internal_api = internal_api
605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
--> 606 self.version = self.get_version()
607 self.num_threads = self.get_num_threads()
608 self._get_extra_info()
C:\ProgramData\Anaconda3\lib\site-packages\threadpoolctl.py in get_version(self)
644 lambda: None)
645 get_config.restype = ctypes.c_char_p
--> 646 config = get_config().split()
647 if config[0] == b"OpenBLAS":
648 return config[1].decode("utf-8")
AttributeError: 'NoneType' object has no attribute 'split'
我试着进一步研究我的数据,但我似乎找不到任何问题。
2条答案
按热度按时间rwqw0loc1#
我发现SMOTE无法处理超过15列的数据框。更多的列会导致SMOTE出现问题,并引发“None Type”异常。您必须安装threadpoolctl包来解决此问题。
nxowjjhe2#
我并不是在提供上述问题的解决方案(我在使用SMOTE时也遇到过这个问题),而是提供一个解决这个问题的小建议。
您可以使用其他与SMOTE相关的方法,如BorderlineSMOTE等。在我的案例中,使用BorderlineSMOTE解决了问题。
此外,我觉得BorderlineSMOTE稍微好一点。因为SMOTE可能会有一些问题。你可以参考以下网站了解更多信息:-
1.比较过采样采样器
我希望它能提供一些帮助。