Hi Piotr:
Cross-validation no longer works (with or without shuffle). For instance:
automl._validation = {"validation_type": "kfold", "k_folds": 15, "shuffle": False, "stratify": True}
used to work but now generates this error:
AutoML task to be solved: binary_classification
AutoML will use algorithms: ['Xgboost']
AutoML will optimize for metric: logloss
AutoML will try to check about 28 models
Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.
ValueError Traceback (most recent call last)
in
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/automl.py in fit(self, X_train, y_train, X_validation, y_validation)
520
521 for params in generated_params:
--> 522 self.train_model(params)
523 # hill climbing
524 for params in tuner.get_hill_climbing_params(self._models):
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/automl.py in train_model(self, params)
262 raise AutoMLException(f"Cannot create directory {model_path}")
263
--> 264 mf.train() # {"train": {"X": X, "y": y}})
265
266 mf.save(model_path)
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/model_framework.py in train(self)
107 np.random.seed(self.learner_params["seed"])
108
--> 109 self.validation = ValidationStep(self.validation_params)
110
111 for k_fold in range(self.validation.get_n_splits()):
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/validation/validation_step.py in init(self, params)
21
22 if self.validation_type == "kfold":
---> 23 self.validator = KFoldValidator(params)
24 else:
25 raise Exception("Other validation types are not implemented yet!")
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/validation/validator_kfold.py in init(self, params)
57
58 for fold_cnt, (train_index, validation_index) in enumerate(
---> 59 self.skf.split(X, y)
60 ):
61
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/sklearn/model_selection/_split.py in split(self, X, y, groups)
728 to an integer.
729 """
--> 730 y = check_array(y, ensure_2d=False, dtype=None)
731 return super().split(X, y, groups)
732
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
576 if force_all_finite:
577 _assert_all_finite(array,
--> 578 allow_nan=force_all_finite == 'allow-nan')
579
580 if ensure_min_samples > 0:
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
58 msg_err.format
59 (type_err,
---> 60 msg_dtype if msg_dtype is not None else X.dtype)
61 )
62 # for object dtype data, we only check for NaNs (GH-13254)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
automl._validation = {"validation_type": "kfold", "k_folds": 15, "shuffle": True, "stratify": True} also generates an error:
AutoML task to be solved: binary_classification
AutoML will use algorithms: ['Xgboost']
AutoML will optimize for metric: logloss
AutoML will try to check about 28 models
ValueError Traceback (most recent call last)
in
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/automl.py in fit(self, X_train, y_train, X_validation, y_validation)
520
521 for params in generated_params:
--> 522 self.train_model(params)
523 # hill climbing
524 for params in tuner.get_hill_climbing_params(self._models):
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/automl.py in train_model(self, params)
262 raise AutoMLException(f"Cannot create directory {model_path}")
263
--> 264 mf.train() # {"train": {"X": X, "y": y}})
265
266 mf.save(model_path)
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/model_framework.py in train(self)
107 np.random.seed(self.learner_params["seed"])
108
--> 109 self.validation = ValidationStep(self.validation_params)
110
111 for k_fold in range(self.validation.get_n_splits()):
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/validation/validation_step.py in init(self, params)
21
22 if self.validation_type == "kfold":
---> 23 self.validator = KFoldValidator(params)
24 else:
25 raise Exception("Other validation types are not implemented yet!")
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/supervised/validation/validator_kfold.py in init(self, params)
57
58 for fold_cnt, (train_index, validation_index) in enumerate(
---> 59 self.skf.split(X, y)
60 ):
61
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/sklearn/model_selection/_split.py in split(self, X, y, groups)
728 to an integer.
729 """
--> 730 y = check_array(y, ensure_2d=False, dtype=None)
731 return super().split(X, y, groups)
732
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
576 if force_all_finite:
577 _assert_all_finite(array,
--> 578 allow_nan=force_all_finite == 'allow-nan')
579
580 if ensure_min_samples > 0:
~/anaconda3/envs/mlj_shap_2/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
58 msg_err.format
59 (type_err,
---> 60 msg_dtype if msg_dtype is not None else X.dtype)
61 )
62 # for object dtype data, we only check for NaNs (GH-13254)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
thanks