automl
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 9 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎autoPyTorch/api/base_task.py‎
Lines changed: 12 additions & 7 deletions b/‎autoPyTorch/api/base_task.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎autoPyTorch/data/base_target_validator.py‎
Lines changed: 0 additions & 1 deletion b/‎autoPyTorch/data/base_target_validator.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 0 additions & 1 deletion b/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎autoPyTorch/data/tabular_target_validator.py‎
Lines changed: 3 additions & 2 deletions b/‎autoPyTorch/data/tabular_target_validator.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎autoPyTorch/datasets/base_dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎autoPyTorch/datasets/base_dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autoPyTorch/ensemble/ensemble_builder.py‎
Lines changed: 2 additions & 2 deletions b/‎autoPyTorch/ensemble/ensemble_builder.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎autoPyTorch/ensemble/ensemble_selection.py‎
Lines changed: 3 additions & 3 deletions b/‎autoPyTorch/ensemble/ensemble_selection.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎autoPyTorch/evaluation/tae.py‎
Lines changed: 7 additions & 3 deletions b/‎autoPyTorch/evaluation/tae.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎autoPyTorch/optimizer/smbo.py‎
Lines changed: 1 addition & 2 deletions b/‎autoPyTorch/optimizer/smbo.py‎
Lines changed: 1 addition & 2 deletions
@@ -3,21 +3,22 @@ repos:
     rev: v0.761
     hooks:
       - id: mypy
-        args: [--show-error-codes]
-        name: mypy AutoPyTorch
+        args: [--show-error-codes,
+               --warn-redundant-casts,
+               --warn-return-any,
+               --warn-unreachable,
+        ]
         files: autoPyTorch/.*
+        exclude: autoPyTorch/ensemble/
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.8.3
     hooks:
       - id: flake8
-        name: flake8 AutoPyTorch
-        files: autoPyTorch/.*
         additional_dependencies:
           - flake8-print==3.1.4
           - flake8-import-order
+        name: flake8 autoPyTorch
+        files: autoPyTorch/.*
       - id: flake8
-        name: flake8 tests
-        files: test/.*
-        additional_dependencies:
-          - flake8-print==3.1.4
-          - flake8-import-order
+        name: flake8 test
+        files: test/.*
@@ -12,7 +12,7 @@
 import unittest.mock
 import warnings
 from abc import abstractmethod
-from typing import Any, Callable, Dict, List, Optional, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 
@@ -34,10 +34,10 @@
     STRING_TO_OUTPUT_TYPES,
     STRING_TO_TASK_TYPES,
 )
+from autoPyTorch.data.base_validator import BaseInputValidator
 from autoPyTorch.datasets.base_dataset import BaseDataset
 from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
-from autoPyTorch.ensemble.ensemble_selection import EnsembleSelection
 from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
 from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
 from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
@@ -187,7 +187,7 @@ def __init__(
 
         self.stop_logging_server = None  # type: Optional[multiprocessing.synchronize.Event]
 
-        self._dask_client = None
+        self._dask_client: Optional[dask.distributed.Client] = None
 
         self.search_space_updates = search_space_updates
         if search_space_updates is not None:
@@ -196,6 +196,8 @@ def __init__(
                 raise ValueError("Expected search space updates to be of instance"
                                  " HyperparameterSearchSpaceUpdates got {}".format(type(self.search_space_updates)))
 
+        self.InputValidator: Optional[BaseInputValidator] = None
+
     @abstractmethod
     def build_pipeline(self, dataset_properties: Dict[str, Any]) -> BasePipeline:
         """
@@ -697,6 +699,7 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        dask_client: Optional[dask.Distributed.Client] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -828,10 +831,11 @@ def _search(
         # If no dask client was provided, we create one, so that we can
         # start a ensemble process in parallel to smbo optimize
         if (
-            self._dask_client is None and (self.ensemble_size > 0 or self.n_jobs is not None and self.n_jobs > 1)
+            dask_client is None and (self.ensemble_size > 0 or self.n_jobs > 1)
         ):
             self._create_dask_client()
         else:
+            self._dask_client = dask_client
             self._is_dask_client_internally_created = False
 
         # Handle time resource allocation
@@ -1177,7 +1181,6 @@ def predict(
 
         # Mypy assert
         assert self.ensemble_ is not None, "Load models should error out if no ensemble"
-        self.ensemble_ = cast(Union[SingleBest, EnsembleSelection], self.ensemble_)
 
         if isinstance(self.resampling_strategy, HoldoutValTypes):
             models = self.models_
@@ -1266,15 +1269,17 @@ def get_models_with_weights(self) -> List:
             self._load_models()
 
         assert self.ensemble_ is not None
-        return self.ensemble_.get_models_with_weights(self.models_)
+        models_with_weights: List = self.ensemble_.get_models_with_weights(self.models_)
+        return models_with_weights
 
     def show_models(self) -> str:
         df = []
         for weight, model in self.get_models_with_weights():
             representation = model.get_pipeline_representation()
             representation.update({'Weight': weight})
             df.append(representation)
-        return pd.DataFrame(df).to_markdown()
+        models_markdown: str = pd.DataFrame(df).to_markdown()
+        return models_markdown
 
     def _print_debug_info_to_log(self) -> None:
         """
 
@@ -95,7 +95,6 @@ def fit(
                                      np.shape(y_test)
                                  ))
             if isinstance(y_train, pd.DataFrame):
-                y_train = typing.cast(pd.DataFrame, y_train)
                 y_test = typing.cast(pd.DataFrame, y_test)
                 if y_train.columns.tolist() != y_test.columns.tolist():
                     raise ValueError(
 
@@ -145,7 +145,6 @@ def transform(
             X = self.numpy_array_to_pandas(X)
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
-            X = typing.cast(pd.DataFrame, X)
             if np.any(pd.isnull(X)):
                 for column in X.columns:
                     if X[column].isna().all():
 
@@ -194,8 +194,9 @@ def _check_data(
                 A set of features whose dimensionality and data type is going to be checked
         """
 
-        if not isinstance(
-                y, (np.ndarray, pd.DataFrame, list, pd.Series)) and not scipy.sparse.issparse(y):
+        if not (isinstance(  # type: ignore[misc]
+                y, (np.ndarray, pd.DataFrame, typing.List, pd.Series))
+                and scipy.sparse.issparse(y)):
             raise ValueError("AutoPyTorch only supports Numpy arrays, Pandas DataFrames,"
                              " pd.Series, sparse data and Python Lists as targets, yet, "
                              "the provided input is of type {}".format(
 
@@ -202,7 +202,7 @@ def __getitem__(self, index: int, train: bool = True) -> Tuple[np.ndarray, ...]:
         return X, Y
 
     def __len__(self) -> int:
-        return self.train_tensors[0].shape[0]
+        return int(self.train_tensors[0].shape[0])
 
     def _get_indices(self) -> np.ndarray:
         return self.random_state.permutation(len(self)) if self.shuffle else np.arange(len(self))
 
@@ -58,7 +58,7 @@ def __init__(
         ensemble_nbest: int,
         max_models_on_disc: Union[float, int],
         seed: int,
-        precision: int,
+        precision: Union[int, str],
         max_iterations: Optional[int],
         read_at_most: int,
         ensemble_memory_limit: Optional[int],
@@ -270,7 +270,7 @@ def fit_and_return_ensemble(
     ensemble_nbest: int,
     max_models_on_disc: Union[float, int],
     seed: int,
-    precision: int,
+    precision: Union[int, str],
     memory_limit: Optional[int],
     read_at_most: int,
     random_state: int,
 
@@ -149,9 +149,9 @@ def _fit(
             if len(predictions) == 1:
                 break
 
-        self.indices_ = order
-        self.trajectory_ = trajectory
-        self.train_loss_ = trajectory[-1]
+        self.indices_: List[int] = order
+        self.trajectory_: List[float] = trajectory
+        self.train_loss_: float = trajectory[-1]
 
     def _calculate_weights(self) -> None:
         """
 
@@ -33,7 +33,7 @@
 
 def fit_predict_try_except_decorator(
         ta: typing.Callable,
-        queue: multiprocessing.Queue, cost_for_crash: float, **kwargs: typing.Any) -> None:
+        queue: multiprocessing.Queue, cost_for_crash: float, **kwargs: typing.Any) -> typing.Optional[typing.Any]:
     try:
         return ta(queue=queue, **kwargs)
     except Exception as e:
@@ -54,6 +54,7 @@ def fit_predict_try_except_decorator(
                    'status': StatusType.CRASHED,
                    'final_queue_element': True}, block=True)
         queue.close()
+        return None
 
 
 def get_cost_of_crash(metric: autoPyTorchMetric) -> float:
@@ -146,13 +147,15 @@ def __init__(
         self.exclude = exclude
         self.disable_file_output = disable_file_output
         self.init_params = init_params
+
+        self.budget_type = pipeline_config['budget_type'] if pipeline_config is not None else budget_type
+
         self.pipeline_config: typing.Dict[str, typing.Union[int, str, float]] = dict()
         if pipeline_config is None:
             pipeline_config = replace_string_bool_to_bool(json.load(open(
                 os.path.join(os.path.dirname(__file__), '../configs/default_pipeline_options.json'))))
         self.pipeline_config.update(pipeline_config)
 
-        self.budget_type = pipeline_config['budget_type'] if pipeline_config is not None else budget_type
         self.logger_port = logger_port
         if self.logger_port is None:
             self.logger: typing.Union[logging.Logger, PicklableClientLogger] = logging.getLogger("TAE")
@@ -236,7 +239,8 @@ def run_wrapper(
             run_info = run_info._replace(cutoff=int(np.ceil(run_info.cutoff)))
 
         self.logger.info("Starting to evaluate configuration %s" % run_info.config.config_id)
-        return super().run_wrapper(run_info=run_info)
+        run_info, run_value = super().run_wrapper(run_info=run_info)
+        return run_info, run_value
 
     def run(
             self,
 
@@ -195,12 +195,11 @@ def __init__(self,
 
         self.search_space_updates = search_space_updates
 
-        dataset_name_ = "" if dataset_name is None else dataset_name
         if logger_port is None:
             self.logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
         else:
             self.logger_port = logger_port
-        logger_name = '%s(%d):%s' % (self.__class__.__name__, self.seed, ":" + dataset_name_)
+        logger_name = '%s(%d):%s' % (self.__class__.__name__, self.seed, ":" + self.dataset_name)
         self.logger = get_named_client_logger(name=logger_name,
                                               port=self.logger_port)
         self.logger.info("initialised {}".format(self.__class__.__name__))