automl
diff --git a/‎autoPyTorch/api/base_task.py‎
Lines changed: 315 additions & 202 deletions b/‎autoPyTorch/api/base_task.py‎
Lines changed: 315 additions & 202 deletions
diff --git a/‎autoPyTorch/api/tabular_classification.py‎
Lines changed: 37 additions & 21 deletions b/‎autoPyTorch/api/tabular_classification.py‎
Lines changed: 37 additions & 21 deletions
diff --git a/‎autoPyTorch/api/tabular_regression.py‎
Lines changed: 40 additions & 35 deletions b/‎autoPyTorch/api/tabular_regression.py‎
Lines changed: 40 additions & 35 deletions
diff --git a/‎autoPyTorch/constants.py‎
Lines changed: 37 additions & 29 deletions b/‎autoPyTorch/constants.py‎
Lines changed: 37 additions & 29 deletions
diff --git a/‎autoPyTorch/data/base_feature_validator.py‎
Lines changed: 16 additions & 12 deletions b/‎autoPyTorch/data/base_feature_validator.py‎
Lines changed: 16 additions & 12 deletions
@@ -19,7 +19,9 @@
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
+from autoPyTorch.utils.hyperparameter_search_space_update import (
+    HyperparameterSearchSpaceUpdates,
+)
 
 
 class TabularClassificationTask(BaseTask):
@@ -57,6 +59,7 @@ class TabularClassificationTask(BaseTask):
             specifies set of components not to use. Incompatible
             with include components
     """
+
     def __init__(
         self,
         seed: int = 1,
@@ -71,10 +74,12 @@ def __init__(
         delete_output_folder_after_terminate: bool = True,
         include_components: Optional[Dict] = None,
         exclude_components: Optional[Dict] = None,
-        resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
+        resampling_strategy: Union[
+            CrossValTypes, HoldoutValTypes
+        ] = HoldoutValTypes.holdout_validation,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         backend: Optional[Backend] = None,
-        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
     ):
         super().__init__(
             seed=seed,
@@ -96,7 +101,9 @@ def __init__(
             task_type=TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
         )
 
-    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularClassificationPipeline:
+    def build_pipeline(
+        self, dataset_properties: Dict[str, Any]
+    ) -> TabularClassificationPipeline:
         return TabularClassificationPipeline(dataset_properties=dataset_properties)
 
     def search(
@@ -119,7 +126,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-    ) -> 'BaseTask':
+    ) -> "BaseTask":
         """
         Search for the best pipeline configuration for the given dataset.
 
@@ -208,11 +215,15 @@ def search(
         # Fit a input validator to check the provided data
         # Also, an encoder is fit to both train and test data,
         # to prevent unseen categories during inference
-        self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
+        self.InputValidator.fit(
+            X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
+        )
 
         self.dataset = TabularDataset(
-            X=X_train, Y=y_train,
-            X_test=X_test, Y_test=y_test,
+            X=X_train,
+            Y=y_train,
+            X_test=X_test,
+            Y_test=y_test,
             validator=self.InputValidator,
             resampling_strategy=self.resampling_strategy,
             resampling_strategy_args=self.resampling_strategy_args,
@@ -236,18 +247,18 @@ def search(
         )
 
     def predict(
-            self,
-            X_test: np.ndarray,
-            batch_size: Optional[int] = None,
-            n_jobs: int = 1
+        self, X_test: np.ndarray, batch_size: Optional[int] = None, n_jobs: int = 1
     ) -> np.ndarray:
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("predict() is only supported after calling search. Kindly call first "
-                             "the estimator fit() method.")
+            raise ValueError(
+                "predict() is only supported after calling search. Kindly call first "
+                "the estimator fit() method."
+            )
 
         X_test = self.InputValidator.feature_validator.transform(X_test)
-        predicted_probabilities = super().predict(X_test, batch_size=batch_size,
-                                                  n_jobs=n_jobs)
+        predicted_probabilities = super().predict(
+            X_test, batch_size=batch_size, n_jobs=n_jobs
+        )
 
         if self.InputValidator.target_validator.is_single_column_target():
             predicted_indexes = np.argmax(predicted_probabilities, axis=1)
@@ -258,11 +269,16 @@ def predict(
         # in our encoded values
         return self.InputValidator.target_validator.inverse_transform(predicted_indexes)
 
-    def predict_proba(self,
-                      X_test: Union[np.ndarray, pd.DataFrame, List],
-                      batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
+    def predict_proba(
+        self,
+        X_test: Union[np.ndarray, pd.DataFrame, List],
+        batch_size: Optional[int] = None,
+        n_jobs: int = 1,
+    ) -> np.ndarray:
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("predict() is only supported after calling search. Kindly call first "
-                             "the estimator fit() method.")
+            raise ValueError(
+                "predict() is only supported after calling search. Kindly call first "
+                "the estimator fit() method."
+            )
         X_test = self.InputValidator.feature_validator.transform(X_test)
         return super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)
@@ -8,18 +8,17 @@
 
 from autoPyTorch.api.base_task import BaseTask
 from autoPyTorch.automl_common.common.utils.backend import Backend
-from autoPyTorch.constants import (
-    TABULAR_REGRESSION,
-    TASK_TYPES_TO_STRING
-)
+from autoPyTorch.constants import TABULAR_REGRESSION, TASK_TYPES_TO_STRING
 from autoPyTorch.data.tabular_validator import TabularInputValidator
 from autoPyTorch.datasets.resampling_strategy import (
     CrossValTypes,
     HoldoutValTypes,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
 from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
+from autoPyTorch.utils.hyperparameter_search_space_update import (
+    HyperparameterSearchSpaceUpdates,
+)
 
 
 class TabularRegressionTask(BaseTask):
@@ -50,23 +49,25 @@ class TabularRegressionTask(BaseTask):
     """
 
     def __init__(
-            self,
-            seed: int = 1,
-            n_jobs: int = 1,
-            logging_config: Optional[Dict] = None,
-            ensemble_size: int = 50,
-            ensemble_nbest: int = 50,
-            max_models_on_disc: int = 50,
-            temporary_directory: Optional[str] = None,
-            output_directory: Optional[str] = None,
-            delete_tmp_folder_after_terminate: bool = True,
-            delete_output_folder_after_terminate: bool = True,
-            include_components: Optional[Dict] = None,
-            exclude_components: Optional[Dict] = None,
-            resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
-            resampling_strategy_args: Optional[Dict[str, Any]] = None,
-            backend: Optional[Backend] = None,
-            search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+        self,
+        seed: int = 1,
+        n_jobs: int = 1,
+        logging_config: Optional[Dict] = None,
+        ensemble_size: int = 50,
+        ensemble_nbest: int = 50,
+        max_models_on_disc: int = 50,
+        temporary_directory: Optional[str] = None,
+        output_directory: Optional[str] = None,
+        delete_tmp_folder_after_terminate: bool = True,
+        delete_output_folder_after_terminate: bool = True,
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        resampling_strategy: Union[
+            CrossValTypes, HoldoutValTypes
+        ] = HoldoutValTypes.holdout_validation,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        backend: Optional[Backend] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
     ):
         super().__init__(
             seed=seed,
@@ -88,7 +89,9 @@ def __init__(
             task_type=TASK_TYPES_TO_STRING[TABULAR_REGRESSION],
         )
 
-    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularRegressionPipeline:
+    def build_pipeline(
+        self, dataset_properties: Dict[str, Any]
+    ) -> TabularRegressionPipeline:
         return TabularRegressionPipeline(dataset_properties=dataset_properties)
 
     def search(
@@ -111,7 +114,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-    ) -> 'BaseTask':
+    ) -> "BaseTask":
         """
         Search for the best pipeline configuration for the given dataset.
 
@@ -196,11 +199,15 @@ def search(
         # Fit a input validator to check the provided data
         # Also, an encoder is fit to both train and test data,
         # to prevent unseen categories during inference
-        self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
+        self.InputValidator.fit(
+            X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
+        )
 
         self.dataset = TabularDataset(
-            X=X_train, Y=y_train,
-            X_test=X_test, Y_test=y_test,
+            X=X_train,
+            Y=y_train,
+            X_test=X_test,
+            Y_test=y_test,
             validator=self.InputValidator,
             resampling_strategy=self.resampling_strategy,
             resampling_strategy_args=self.resampling_strategy_args,
@@ -224,18 +231,16 @@ def search(
         )
 
     def predict(
-            self,
-            X_test: np.ndarray,
-            batch_size: Optional[int] = None,
-            n_jobs: int = 1
+        self, X_test: np.ndarray, batch_size: Optional[int] = None, n_jobs: int = 1
     ) -> np.ndarray:
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("predict() is only supported after calling search. Kindly call first "
-                             "the estimator fit() method.")
+            raise ValueError(
+                "predict() is only supported after calling search. Kindly call first "
+                "the estimator fit() method."
+            )
 
         X_test = self.InputValidator.feature_validator.transform(X_test)
-        predicted_values = super().predict(X_test, batch_size=batch_size,
-                                           n_jobs=n_jobs)
+        predicted_values = super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)
 
         # Allow to predict in the original domain -- that is, the user is not interested
         # in our encoded values
 
@@ -6,27 +6,33 @@
 TIMESERIES_REGRESSION = 6
 
 REGRESSION_TASKS = [TABULAR_REGRESSION, IMAGE_REGRESSION, TIMESERIES_REGRESSION]
-CLASSIFICATION_TASKS = [TABULAR_CLASSIFICATION, IMAGE_CLASSIFICATION, TIMESERIES_CLASSIFICATION]
+CLASSIFICATION_TASKS = [
+    TABULAR_CLASSIFICATION,
+    IMAGE_CLASSIFICATION,
+    TIMESERIES_CLASSIFICATION,
+]
 
 TABULAR_TASKS = [TABULAR_CLASSIFICATION, TABULAR_REGRESSION]
 IMAGE_TASKS = [IMAGE_CLASSIFICATION, IMAGE_REGRESSION]
 TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS
 
-TASK_TYPES_TO_STRING = \
-    {TABULAR_CLASSIFICATION: 'tabular_classification',
-     IMAGE_CLASSIFICATION: 'image_classification',
-     TABULAR_REGRESSION: 'tabular_regression',
-     IMAGE_REGRESSION: 'image_regression',
-     TIMESERIES_CLASSIFICATION: 'time_series_classification',
-     TIMESERIES_REGRESSION: 'time_series_regression'}
-
-STRING_TO_TASK_TYPES = \
-    {'tabular_classification': TABULAR_CLASSIFICATION,
-     'image_classification': IMAGE_CLASSIFICATION,
-     'tabular_regression': TABULAR_REGRESSION,
-     'image_regression': IMAGE_REGRESSION,
-     'time_series_classification': TIMESERIES_CLASSIFICATION,
-     'time_series_regression': TIMESERIES_REGRESSION}
+TASK_TYPES_TO_STRING = {
+    TABULAR_CLASSIFICATION: "tabular_classification",
+    IMAGE_CLASSIFICATION: "image_classification",
+    TABULAR_REGRESSION: "tabular_regression",
+    IMAGE_REGRESSION: "image_regression",
+    TIMESERIES_CLASSIFICATION: "time_series_classification",
+    TIMESERIES_REGRESSION: "time_series_regression",
+}
+
+STRING_TO_TASK_TYPES = {
+    "tabular_classification": TABULAR_CLASSIFICATION,
+    "image_classification": IMAGE_CLASSIFICATION,
+    "tabular_regression": TABULAR_REGRESSION,
+    "image_regression": IMAGE_REGRESSION,
+    "time_series_classification": TIMESERIES_CLASSIFICATION,
+    "time_series_regression": TIMESERIES_REGRESSION,
+}
 
 # Output types have been defined as in scikit-learn type_of_target
 # (https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html)
@@ -38,19 +44,21 @@
 
 OUTPUT_TYPES = [BINARY, CONTINUOUSMULTIOUTPUT, MULTICLASS, CONTINUOUS]
 
-OUTPUT_TYPES_TO_STRING = \
-    {BINARY: 'binary',
-     CONTINUOUSMULTIOUTPUT: 'continuous-multioutput',
-     MULTICLASS: 'multiclass',
-     CONTINUOUS: 'continuous',
-     MULTICLASSMULTIOUTPUT: 'multiclass-multioutput'}
-
-STRING_TO_OUTPUT_TYPES = \
-    {'binary': BINARY,
-     'continuous-multioutput': CONTINUOUSMULTIOUTPUT,
-     'multiclass': MULTICLASS,
-     'continuous': CONTINUOUS,
-     'multiclass-multioutput': MULTICLASSMULTIOUTPUT}
+OUTPUT_TYPES_TO_STRING = {
+    BINARY: "binary",
+    CONTINUOUSMULTIOUTPUT: "continuous-multioutput",
+    MULTICLASS: "multiclass",
+    CONTINUOUS: "continuous",
+    MULTICLASSMULTIOUTPUT: "multiclass-multioutput",
+}
+
+STRING_TO_OUTPUT_TYPES = {
+    "binary": BINARY,
+    "continuous-multioutput": CONTINUOUSMULTIOUTPUT,
+    "multiclass": MULTICLASS,
+    "continuous": CONTINUOUS,
+    "multiclass-multioutput": MULTICLASSMULTIOUTPUT,
+}
 
 CLASSIFICATION_OUTPUTS = [BINARY, MULTICLASS, MULTICLASSMULTIOUTPUT]
 REGRESSION_OUTPUTS = [CONTINUOUS, CONTINUOUSMULTIOUTPUT]
@@ -41,10 +41,13 @@ class BaseFeatureValidator(BaseEstimator):
         enc_columns (typing.List[str])
             List of columns that were encoded.
     """
-    def __init__(self,
-                 logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
-                                                      ]] = None,
-                 ) -> None:
+
+    def __init__(
+        self,
+        logger: typing.Optional[
+            typing.Union[PicklableClientLogger, logging.Logger]
+        ] = None,
+    ) -> None:
         # Register types to detect unsupported data format changes
         self.feat_type = None  # type: typing.Optional[typing.List[str]]
         self.data_type = None  # type: typing.Optional[type]
@@ -54,9 +57,9 @@ def __init__(self,
         self.encoder = None  # type: typing.Optional[BaseEstimator]
         self.enc_columns = []  # type: typing.List[str]
 
-        self.logger: typing.Union[
-            PicklableClientLogger, logging.Logger
-        ] = logger if logger is not None else logging.getLogger(__name__)
+        self.logger: typing.Union[PicklableClientLogger, logging.Logger] = (
+            logger if logger is not None else logging.getLogger(__name__)
+        )
 
         # Required for dataset properties
         self.num_features = None  # type: typing.Optional[int]
@@ -94,11 +97,12 @@ def fit(
             self._check_data(X_test)
 
             if np.shape(X_train)[1] != np.shape(X_test)[1]:
-                raise ValueError("The feature dimensionality of the train and test "
-                                 "data does not match train({}) != test({})".format(
-                                     np.shape(X_train)[1],
-                                     np.shape(X_test)[1]
-                                 ))
+                raise ValueError(
+                    "The feature dimensionality of the train and test "
+                    "data does not match train({}) != test({})".format(
+                        np.shape(X_train)[1], np.shape(X_test)[1]
+                    )
+                )
 
         # Fit on the training data
         self._fit(X_train)