How can I import all of sklearns regressors
Question:
I’m doing some predictive modeling and would like to benchmark different kinds of regressors in scikit-learn
, just to see what’s out there and how they perform on a given prediction task.
I got inspired to do this by this kaggle kernel in which the author essentially manually imports a bunch of classifiers (about 10) and benchmarks them.
I’m having trouble finding a comprehensive list of imports for the regressors in sklearn
so I’m trying to automatize the import
statements to automatically return me a list of classes that I can use.
I tried to dynamically import the classes:
from importlib import import_module
import sklearn
def all_regressors():
regressors=[]
for module in sklearn.__all__:
try:
regressors.extend([cls for cls in import_module(f'sklearn.{module}').__all__ if 'Regress' in cls ])
except:
pass
regressors.append(sklearn.svm.SVR)
return regressors
print(all_regressors())
But i only get back the names as strings, rather than the classes:
['RandomForestRegressor', 'ExtraTreesRegressor', 'BaggingRegressor',
'GradientBoostingRegressor', 'AdaBoostRegressor',
'GaussianProcessRegressor', 'IsotonicRegression', 'ARDRegression',
'HuberRegressor', 'LinearRegression', 'LogisticRegression',
'LogisticRegressionCV', 'PassiveAggressiveRegressor',
'RandomizedLogisticRegression', 'SGDRegressor', 'TheilSenRegressor',
'RANSACRegressor', 'MultiOutputRegressor', 'KNeighborsRegressor',
'RadiusNeighborsRegressor', 'MLPRegressor', 'DecisionTreeRegressor',
'ExtraTreeRegressor', <class 'sklearn.svm.classes.SVR'>]
How can I get the actual classes?
Answers:
I figured out i had to use getattr
on the module object:
from importlib import import_module
import sklearn
def all_regressors():
regressors=[]
for module in sklearn.__all__:
try:
module = import_module(f'sklearn.{module}')
regressors.extend([getattr(module,cls) for cls in module.__all__ if 'Regress' in cls ])
except:
pass
regressors.append(sklearn.svm.SVR)
return regressors
print(all_regressors())
[<class 'sklearn.ensemble.forest.RandomForestRegressor'>, <class
'sklearn.ensemble.forest.ExtraTreesRegressor'>, <class
'sklearn.ensemble.bagging.BaggingRegressor'>, <class
'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>,
<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, <class
'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>, <class
'sklearn.isotonic.IsotonicRegression'>, <class
'sklearn.linear_model.bayes.ARDRegression'>, <class
'sklearn.linear_model.huber.HuberRegressor'>, <class
'sklearn.linear_model.base.LinearRegression'>, <class
'sklearn.linear_model.logistic.LogisticRegression'>, <class
'sklearn.linear_model.logistic.LogisticRegressionCV'>, <class
'sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor'>,
<class 'sklearn.linear_model.randomized_l1.RandomizedLogisticRegression'>, <class
'sklearn.linear_model.stochastic_gradient.SGDRegressor'>, <class
'sklearn.linear_model.theil_sen.TheilSenRegressor'>, <class
'sklearn.linear_model.ransac.RANSACRegressor'>, <class
'sklearn.multioutput.MultiOutputRegressor'>, <class
'sklearn.neighbors.regression.KNeighborsRegressor'>, <class
'sklearn.neighbors.regression.RadiusNeighborsRegressor'>, <class
'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, <class
'sklearn.tree.tree.DecisionTreeRegressor'>, <class
'sklearn.tree.tree.ExtraTreeRegressor'>, <class
'sklearn.svm.classes.SVR'>]
You can use all_estimators
from sklearn.utils
from sklearn.utils import all_estimators
def get_all_regressors_sklearn():
estimators = all_estimators(type_filter='regressor')
all_regs = []
for name, RegClass in estimators:
print('Appending', name)
try:
reg = RegClass()
all_regs.append(reg)
except Exception as e:
pass
return all_regs
all_regs = get_all_regressors_sklearn()
print(all_regs)
Gives:
[ARDRegression(), AdaBoostRegressor(), BaggingRegressor(), BayesianRidge(), CCA(), DecisionTreeRegressor(), DummyRegressor(), ElasticNet(), ElasticNetCV(), ExtraTreeRegressor(), ExtraTreesRegressor(), GammaRegressor(), GaussianProcessRegressor(), GradientBoostingRegressor(), HistGradientBoostingRegressor(), HuberRegressor(), IsotonicRegression(), KNeighborsRegressor(), KernelRidge(), Lars(), LarsCV(), Lasso(), LassoCV(), LassoLars(), LassoLarsCV(), LassoLarsIC(), LinearRegression(), LinearSVR(), MLPRegressor(), MultiTaskElasticNet(), MultiTaskElasticNetCV(), MultiTaskLasso(), MultiTaskLassoCV(), NuSVR(), OrthogonalMatchingPursuit(), OrthogonalMatchingPursuitCV(), PLSCanonical(), PLSRegression(), PassiveAggressiveRegressor(), PoissonRegressor(), QuantileRegressor(), RANSACRegressor(), RadiusNeighborsRegressor(), RandomForestRegressor(), Ridge(), RidgeCV(), SGDRegressor(), SVR(), TheilSenRegressor(), TransformedTargetRegressor(), TweedieRegressor()]
I’m doing some predictive modeling and would like to benchmark different kinds of regressors in scikit-learn
, just to see what’s out there and how they perform on a given prediction task.
I got inspired to do this by this kaggle kernel in which the author essentially manually imports a bunch of classifiers (about 10) and benchmarks them.
I’m having trouble finding a comprehensive list of imports for the regressors in sklearn
so I’m trying to automatize the import
statements to automatically return me a list of classes that I can use.
I tried to dynamically import the classes:
from importlib import import_module
import sklearn
def all_regressors():
regressors=[]
for module in sklearn.__all__:
try:
regressors.extend([cls for cls in import_module(f'sklearn.{module}').__all__ if 'Regress' in cls ])
except:
pass
regressors.append(sklearn.svm.SVR)
return regressors
print(all_regressors())
But i only get back the names as strings, rather than the classes:
['RandomForestRegressor', 'ExtraTreesRegressor', 'BaggingRegressor',
'GradientBoostingRegressor', 'AdaBoostRegressor',
'GaussianProcessRegressor', 'IsotonicRegression', 'ARDRegression',
'HuberRegressor', 'LinearRegression', 'LogisticRegression',
'LogisticRegressionCV', 'PassiveAggressiveRegressor',
'RandomizedLogisticRegression', 'SGDRegressor', 'TheilSenRegressor',
'RANSACRegressor', 'MultiOutputRegressor', 'KNeighborsRegressor',
'RadiusNeighborsRegressor', 'MLPRegressor', 'DecisionTreeRegressor',
'ExtraTreeRegressor', <class 'sklearn.svm.classes.SVR'>]
How can I get the actual classes?
I figured out i had to use getattr
on the module object:
from importlib import import_module
import sklearn
def all_regressors():
regressors=[]
for module in sklearn.__all__:
try:
module = import_module(f'sklearn.{module}')
regressors.extend([getattr(module,cls) for cls in module.__all__ if 'Regress' in cls ])
except:
pass
regressors.append(sklearn.svm.SVR)
return regressors
print(all_regressors())
[<class 'sklearn.ensemble.forest.RandomForestRegressor'>, <class
'sklearn.ensemble.forest.ExtraTreesRegressor'>, <class
'sklearn.ensemble.bagging.BaggingRegressor'>, <class
'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>,
<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, <class
'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>, <class
'sklearn.isotonic.IsotonicRegression'>, <class
'sklearn.linear_model.bayes.ARDRegression'>, <class
'sklearn.linear_model.huber.HuberRegressor'>, <class
'sklearn.linear_model.base.LinearRegression'>, <class
'sklearn.linear_model.logistic.LogisticRegression'>, <class
'sklearn.linear_model.logistic.LogisticRegressionCV'>, <class
'sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor'>,
<class 'sklearn.linear_model.randomized_l1.RandomizedLogisticRegression'>, <class
'sklearn.linear_model.stochastic_gradient.SGDRegressor'>, <class
'sklearn.linear_model.theil_sen.TheilSenRegressor'>, <class
'sklearn.linear_model.ransac.RANSACRegressor'>, <class
'sklearn.multioutput.MultiOutputRegressor'>, <class
'sklearn.neighbors.regression.KNeighborsRegressor'>, <class
'sklearn.neighbors.regression.RadiusNeighborsRegressor'>, <class
'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, <class
'sklearn.tree.tree.DecisionTreeRegressor'>, <class
'sklearn.tree.tree.ExtraTreeRegressor'>, <class
'sklearn.svm.classes.SVR'>]
You can use all_estimators
from sklearn.utils
from sklearn.utils import all_estimators
def get_all_regressors_sklearn():
estimators = all_estimators(type_filter='regressor')
all_regs = []
for name, RegClass in estimators:
print('Appending', name)
try:
reg = RegClass()
all_regs.append(reg)
except Exception as e:
pass
return all_regs
all_regs = get_all_regressors_sklearn()
print(all_regs)
Gives:
[ARDRegression(), AdaBoostRegressor(), BaggingRegressor(), BayesianRidge(), CCA(), DecisionTreeRegressor(), DummyRegressor(), ElasticNet(), ElasticNetCV(), ExtraTreeRegressor(), ExtraTreesRegressor(), GammaRegressor(), GaussianProcessRegressor(), GradientBoostingRegressor(), HistGradientBoostingRegressor(), HuberRegressor(), IsotonicRegression(), KNeighborsRegressor(), KernelRidge(), Lars(), LarsCV(), Lasso(), LassoCV(), LassoLars(), LassoLarsCV(), LassoLarsIC(), LinearRegression(), LinearSVR(), MLPRegressor(), MultiTaskElasticNet(), MultiTaskElasticNetCV(), MultiTaskLasso(), MultiTaskLassoCV(), NuSVR(), OrthogonalMatchingPursuit(), OrthogonalMatchingPursuitCV(), PLSCanonical(), PLSRegression(), PassiveAggressiveRegressor(), PoissonRegressor(), QuantileRegressor(), RANSACRegressor(), RadiusNeighborsRegressor(), RandomForestRegressor(), Ridge(), RidgeCV(), SGDRegressor(), SVR(), TheilSenRegressor(), TransformedTargetRegressor(), TweedieRegressor()]