Hi, I am running into issues when I try to run AnyClassiefier on OpenML tasks.
So far I have encountered the following examples:
- Getting a value error only when I try to run on the openml task, not when I run on the same dataset locally.
task = openml.tasks.get_task(15)
clf = make_pipeline(dabl.models.AnyClassifier(force_exhaust_budget=False))
run = openml.runs.run_model_on_task(clf, task)
best classifier: SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma=0.03162277660168379,
kernel='rbf', max_iter=-1, probability=False, random_state=1,
shrinking=True, tol=0.001, verbose=False)
best score: 0.964
best classifier: HistGradientBoostingClassifier(l2_regularization=0.0001, learning_rate=0.1,
loss='auto', max_bins=16, max_depth=7,
max_iter=200, max_leaf_nodes=4,
min_samples_leaf=4, n_iter_no_change=None,
random_state=7320, scoring=None, tol=1e-07,
validation_fraction=0.1, verbose=0)
best score: 0.952
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-d9533783e081> in <module>
9 # run clf on the task
10 print('Run clf on the task')
---> 11 run = openml.runs.run_model_on_task(clf, task)
12
13 # print feedbackack
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in run_model_on_task(model, task, avoid_duplicate_runs, flow_tags, seed, add_local_measures, upload_flow, return_flow)
104 seed=seed,
105 add_local_measures=add_local_measures,
--> 106 upload_flow=upload_flow,
107 )
108 if return_flow:
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in run_flow_on_task(flow, task, avoid_duplicate_runs, flow_tags, seed, add_local_measures, upload_flow)
220 task=task,
221 extension=flow.extension,
--> 222 add_local_measures=add_local_measures,
223 )
224
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in _run_task_get_arffcontent(flow, model, task, extension, add_local_measures)
444 rep_no=rep_no,
445 fold_no=fold_no,
--> 446 X_test=test_x,
447 )
448 if trace is not None:
/miniconda3/lib/python3.7/site-packages/openml/extensions/sklearn/extension.py in _run_model_on_fold(self, model, task, X_train, rep_no, fold_no, y_train, X_test)
1356
1357 if isinstance(task, OpenMLSupervisedTask):
-> 1358 model_copy.fit(X_train, y_train)
1359 elif isinstance(task, OpenMLClusteringTask):
1360 model_copy.fit(X_train)
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
354 self._log_message(len(self.steps) - 1)):
355 if self._final_estimator != 'passthrough':
--> 356 self._final_estimator.fit(Xt, y, **fit_params)
357 return self
358
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/models.py in fit(self, X, y, target_col)
351 scoring='recall_macro')
352 self.search_ = gs
--> 353 gs.fit(X, y)
354 self.est_ = gs.best_estimator_
355
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/search.py in fit(self, X, y, groups, **fit_params)
132 groups=groups,
133 )
--> 134 super().fit(X, y=y, groups=groups, **fit_params)
135 # Set best_score_: BaseSearchCV does not set it, as refit is a callable
136 self.best_score_ = (
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/_search.py in fit(self, X, y, groups, **fit_params)
342 return results
343
--> 344 self._run_search(evaluate_candidates, X, y, groups)
345
346 # For multi-metric evaluation, store the best_index_, best_params_ and
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/search.py in _run_search(self, evaluate_candidates, X, y, groups)
232 'r_i': [r_i] * n_candidates}
233 results = evaluate_candidates(candidate_params, X_iter, y_iter,
--> 234 groups, more_results=more_results)
235
236 n_candidates_to_keep = ceil(n_candidates / self.ratio)
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/_search.py in evaluate_candidates(candidate_params, X, y, groups, more_results)
316 for parameters, (train, test)
317 in product(candidate_params,
--> 318 cv.split(X, y, groups)))
319
320 if len(out) < 1:
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
922 self._iterating = self._original_iterator is not None
923
--> 924 while self.dispatch_one_batch(iterator):
925 pass
926
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
/miniconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/miniconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/miniconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
552 fit_time = time.time() - start_time
553 # _score will return dict if is_multimetric is True
--> 554 test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
555 score_time = time.time() - start_time - fit_time
556 if return_train_score:
/miniconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _score(estimator, X_test, y_test, scorer, is_multimetric)
595 """
596 if is_multimetric:
--> 597 return _multimetric_score(estimator, X_test, y_test, scorer)
598 else:
599 if y_test is None:
/miniconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _multimetric_score(estimator, X_test, y_test, scorers)
625 score = scorer(estimator, X_test)
626 else:
--> 627 score = scorer(estimator, X_test, y_test)
628
629 if hasattr(score, 'item'):
/miniconda3/lib/python3.7/site-packages/sklearn/metrics/scorer.py in __call__(self, estimator, X, y_true, sample_weight)
88 """
89
---> 90 y_pred = estimator.predict(X)
91 if sample_weight is not None:
92 return self._sign * self._score_func(y_true, y_pred,
/miniconda3/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
420 for _, name, transform in self._iter(with_final=False):
421 Xt = transform.transform(Xt)
--> 422 return self.steps[-1][-1].predict(Xt, **predict_params)
423
424 @if_delegate_has_method(delegate='_final_estimator')
/miniconda3/lib/python3.7/site-packages/sklearn/svm/base.py in predict(self, X)
572 Class labels for samples in X.
573 """
--> 574 y = super().predict(X)
575 return self.classes_.take(np.asarray(y, dtype=np.intp))
576
/miniconda3/lib/python3.7/site-packages/sklearn/svm/base.py in predict(self, X)
320 y_pred : array, shape (n_samples,)
321 """
--> 322 X = self._validate_for_predict(X)
323 predict = self._sparse_predict if self._sparse else self._dense_predict
324 return predict(X)
/miniconda3/lib/python3.7/site-packages/sklearn/svm/base.py in _validate_for_predict(self, X)
452
453 X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C",
--> 454 accept_large_sparse=False)
455 if self._sparse and not sp.isspmatrix(X):
456 X = sp.csr_matrix(X)
/miniconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
540 if force_all_finite:
541 _assert_all_finite(array,
--> 542 allow_nan=force_all_finite == 'allow-nan')
543
544 if ensure_min_samples > 0:
/miniconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan)
54 not allow_nan and not np.isfinite(X).all()):
55 type_err = 'infinity' if allow_nan else 'NaN, infinity'
---> 56 raise ValueError(msg_err.format(type_err, X.dtype))
57 # for object dtype data, we only check for NaNs (GH-13254)
58 elif X.dtype == np.dtype('object') and not allow_nan:
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
- Running into issues when features are filtered out with the near_constant_threshold.
task = openml.tasks.get_task(3)
clf = make_pipeline(dabl.models.AnyClassifier(force_exhaust_budget=False))
run = openml.runs.run_model_on_task(clf, task)
/Users/hp2500/Google Drive/STUDY/Columbia/Research/dabl/dabl/preprocessing.py:255: UserWarning: Discarding near-constant features: [2, 13, 15, 16, 18, 24, 27, 28, 29]
near_constant.index[near_constant].tolist()))
best classifier: HistGradientBoostingClassifier(l2_regularization=1e-06, learning_rate=0.1,
loss='auto', max_bins=128, max_depth=12,
max_iter=300, max_leaf_nodes=4,
min_samples_leaf=3, n_iter_no_change=None,
random_state=28019, scoring=None, tol=1e-07,
validation_fraction=0.2, verbose=0)
best score: 0.959
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-2-75c2ea531ac0> in <module>
9 # run clf on the task
10 print('Run clf on the task')
---> 11 run = openml.runs.run_model_on_task(clf, task)
12
13 # print feedbackack
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in run_model_on_task(model, task, avoid_duplicate_runs, flow_tags, seed, add_local_measures, upload_flow, return_flow)
104 seed=seed,
105 add_local_measures=add_local_measures,
--> 106 upload_flow=upload_flow,
107 )
108 if return_flow:
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in run_flow_on_task(flow, task, avoid_duplicate_runs, flow_tags, seed, add_local_measures, upload_flow)
220 task=task,
221 extension=flow.extension,
--> 222 add_local_measures=add_local_measures,
223 )
224
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in _run_task_get_arffcontent(flow, model, task, extension, add_local_measures)
444 rep_no=rep_no,
445 fold_no=fold_no,
--> 446 X_test=test_x,
447 )
448 if trace is not None:
/miniconda3/lib/python3.7/site-packages/openml/extensions/sklearn/extension.py in _run_model_on_fold(self, model, task, X_train, rep_no, fold_no, y_train, X_test)
1393 # it returns the clusters
1394 if isinstance(task, OpenMLSupervisedTask):
-> 1395 pred_y = model_copy.predict(X_test)
1396 elif isinstance(task, OpenMLClusteringTask):
1397 pred_y = model_copy.predict(X_train)
/miniconda3/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
420 for _, name, transform in self._iter(with_final=False):
421 Xt = transform.transform(Xt)
--> 422 return self.steps[-1][-1].predict(Xt, **predict_params)
423
424 @if_delegate_has_method(delegate='_final_estimator')
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/models.py in predict(self, X)
300 check_is_fitted(self, 'est_')
301 if getattr(self, 'classes_', None) is not None:
--> 302 return self.classes_[self.est_.predict(X)]
303
304 return self.est_.predict(X)
/miniconda3/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
419 Xt = X
420 for _, name, transform in self._iter(with_final=False):
--> 421 Xt = transform.transform(Xt)
422 return self.steps[-1][-1].predict(Xt, **predict_params)
423
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/preprocessing.py in transform(self, X)
550 # Check is fit had been called
551 check_is_fitted(self, ['ct_'])
--> 552 return self.ct_.transform(X)
/miniconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in transform(self, X)
510
511 X = _check_X(X)
--> 512 Xs = self._fit_transform(X, None, _transform_one, fitted=True)
513 self._validate_output(Xs)
514
/miniconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
410 message=self._log_message(name, idx, len(transformers)))
411 for idx, (name, trans, column, weight) in enumerate(
--> 412 self._iter(fitted=fitted, replace_strings=True), 1))
413 except ValueError as e:
414 if "Expected 2D array, got 1D array instead" in str(e):
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
752 tasks = BatchedCalls(itertools.islice(iterator, batch_size),
753 self._backend.get_nested_backend(),
--> 754 self._pickle_cache)
755 if len(tasks) == 0:
756 # No more tasks available in the iterator: tell caller to stop.
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in __init__(self, iterator_slice, backend_and_jobs, pickle_cache)
208
209 def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
--> 210 self.items = list(iterator_slice)
211 self._size = len(self.items)
212 if isinstance(backend_and_jobs, tuple):
/miniconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
409 message_clsname='ColumnTransformer',
410 message=self._log_message(name, idx, len(transformers)))
--> 411 for idx, (name, trans, column, weight) in enumerate(
412 self._iter(fitted=fitted, replace_strings=True), 1))
413 except ValueError as e:
/miniconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _get_column(X, key)
636 else:
637 # numpy arrays, sparse arrays
--> 638 return X[:, key]
639
640
IndexError: boolean index did not match indexed array along dimension 1; dimension is 36 but corresponding boolean dimension is 27
- After getting rid of the variance threshold I am getting...
task = openml.tasks.get_task(3)
clf = make_pipeline(dabl.models.AnyClassifier(force_exhaust_budget=False))
run = openml.runs.run_model_on_task(clf, task)
/miniconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py:565: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
mask &= (ar1 != a)
/miniconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py:569: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
mask |= (ar1 == a)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-75c2ea531ac0> in <module>
9 # run clf on the task
10 print('Run clf on the task')
---> 11 run = openml.runs.run_model_on_task(clf, task)
12
13 # print feedbackack
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in run_model_on_task(model, task, avoid_duplicate_runs, flow_tags, seed, add_local_measures, upload_flow, return_flow)
104 seed=seed,
105 add_local_measures=add_local_measures,
--> 106 upload_flow=upload_flow,
107 )
108 if return_flow:
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in run_flow_on_task(flow, task, avoid_duplicate_runs, flow_tags, seed, add_local_measures, upload_flow)
220 task=task,
221 extension=flow.extension,
--> 222 add_local_measures=add_local_measures,
223 )
224
/miniconda3/lib/python3.7/site-packages/openml/runs/functions.py in _run_task_get_arffcontent(flow, model, task, extension, add_local_measures)
444 rep_no=rep_no,
445 fold_no=fold_no,
--> 446 X_test=test_x,
447 )
448 if trace is not None:
/miniconda3/lib/python3.7/site-packages/openml/extensions/sklearn/extension.py in _run_model_on_fold(self, model, task, X_train, rep_no, fold_no, y_train, X_test)
1393 # it returns the clusters
1394 if isinstance(task, OpenMLSupervisedTask):
-> 1395 pred_y = model_copy.predict(X_test)
1396 elif isinstance(task, OpenMLClusteringTask):
1397 pred_y = model_copy.predict(X_train)
/miniconda3/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
420 for _, name, transform in self._iter(with_final=False):
421 Xt = transform.transform(Xt)
--> 422 return self.steps[-1][-1].predict(Xt, **predict_params)
423
424 @if_delegate_has_method(delegate='_final_estimator')
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/models.py in predict(self, X)
300 check_is_fitted(self, 'est_')
301 if getattr(self, 'classes_', None) is not None:
--> 302 return self.classes_[self.est_.predict(X)]
303
304 return self.est_.predict(X)
/miniconda3/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
419 Xt = X
420 for _, name, transform in self._iter(with_final=False):
--> 421 Xt = transform.transform(Xt)
422 return self.steps[-1][-1].predict(Xt, **predict_params)
423
~/Google Drive/STUDY/Columbia/Research/dabl/dabl/preprocessing.py in transform(self, X)
550 # Check is fit had been called
551 check_is_fitted(self, ['ct_'])
--> 552 return self.ct_.transform(X)
/miniconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in transform(self, X)
510
511 X = _check_X(X)
--> 512 Xs = self._fit_transform(X, None, _transform_one, fitted=True)
513 self._validate_output(Xs)
514
/miniconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
410 message=self._log_message(name, idx, len(transformers)))
411 for idx, (name, trans, column, weight) in enumerate(
--> 412 self._iter(fitted=fitted, replace_strings=True), 1))
413 except ValueError as e:
414 if "Expected 2D array, got 1D array instead" in str(e):
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
/miniconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/miniconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/miniconda3/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _transform_one(transformer, X, y, weight, **fit_params)
693
694 def _transform_one(transformer, X, y, weight, **fit_params):
--> 695 res = transformer.transform(X)
696 # if we have a weight for this transformer, multiply output
697 if weight is None:
/miniconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _transform(self, X)
538 Xt = X
539 for _, _, transform in self._iter():
--> 540 Xt = transform.transform(Xt)
541 return Xt
542
/miniconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py in transform(self, X)
730 copy=True)
731 else:
--> 732 return self._transform_new(X)
733
734 def inverse_transform(self, X):
/miniconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py in _transform_new(self, X)
678 """New implementation assuming categorical input"""
679 # validation of X happens in _check_X called by _transform
--> 680 X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown)
681
682 n_samples, n_features = X_int.shape
/miniconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py in _transform(self, X, handle_unknown)
135
136 Xi[~valid_mask] = self.categories_[i][0]
--> 137 _, encoded = _encode(Xi, self.categories_[i], encode=True)
138 X_int[:, i] = encoded
139
/miniconda3/lib/python3.7/site-packages/sklearn/preprocessing/label.py in _encode(values, uniques, encode)
108 return res
109 else:
--> 110 return _encode_numpy(values, uniques, encode)
111
112
/miniconda3/lib/python3.7/site-packages/sklearn/preprocessing/label.py in _encode_numpy(values, uniques, encode)
47 if diff:
48 raise ValueError("y contains previously unseen labels: %s"
---> 49 % str(diff))
50 encoded = np.searchsorted(uniques, values)
51 return uniques, encoded
ValueError: y contains previously unseen labels: [0.0]