下面的代码在GoogleColab上执行需要5.0分钟,而在我的机器上执行大约需要3.0分钟。在我测试的所有其他任务(机器学习或其他)中,colab以50-100%的优势击败了我的机器。我尝试安装不同的sklearn版本,使用gpu运行,还尝试使用n_作业值,但时间要么变慢了,要么保持不变。
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.feature_selection import RFE
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV, KFold
from datetime import datetime
param_grid = [
{'feature_selection': [RFE(estimator=GradientBoostingClassifier(random_state=0))],
'feature_selection__n_features_to_select': [2],
'scaling': [StandardScaler()],
'classification': [GradientBoostingClassifier(random_state=0)],
'classification__n_estimators': [100, 500],
'classification__max_features': ['auto', 'log2'],
'classification__max_depth': [2, 4],
'classification__learning_rate': [0.01, ],
'classification__loss': ['exponential'],
'classification__min_samples_split': [2, 200],
'classification__min_samples_leaf': [1, 20]},
{'feature_selection': [RFE(estimator=LogisticRegression(random_state=0))],
'feature_selection__n_features_to_select': [2],
'scaling': [StandardScaler()],
'classification': [LogisticRegression(random_state=0)],
'classification__C': [0.1, 100, 1000],
'classification__penalty': ['l1'],
'classification__solver': ['liblinear']}
]
pipe = Pipeline(steps=[('scaling', StandardScaler()),
('feature_selection', RFE(estimator=GradientBoostingClassifier())),
('classification', GradientBoostingClassifier())])
grid_obj = RandomizedSearchCV(estimator=pipe, param_distributions=param_grid,
scoring='neg_brier_score', cv=KFold(shuffle=True), random_state=0,
return_train_score=True, n_jobs=-1, verbose=10)
X, y = load_breast_cancer(return_X_y=True)
grid_obj.fit(X, y)
google colab结果:
# [Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
# [Parallel(n_jobs=-1)]: Done 1 tasks | elapsed: 13.3s
# [Parallel(n_jobs=-1)]: Done 4 tasks | elapsed: 25.8s
# [Parallel(n_jobs=-1)]: Done 9 tasks | elapsed: 1.0min
# [Parallel(n_jobs=-1)]: Done 14 tasks | elapsed: 1.4min
# [Parallel(n_jobs=-1)]: Done 21 tasks | elapsed: 2.2min
# [Parallel(n_jobs=-1)]: Done 28 tasks | elapsed: 2.8min
# [Parallel(n_jobs=-1)]: Done 37 tasks | elapsed: 3.8min
# [Parallel(n_jobs=-1)]: Done 46 tasks | elapsed: 4.6min
# [Parallel(n_jobs=-1)]: Done 50 out of 50 | elapsed: 5.0min finished
暂无答案!
目前还没有任何答案,快来回答吧!