automl
diff --git a/‎.github/workflows/long_regression_test.yml‎
Lines changed: 35 additions & 0 deletions b/‎.github/workflows/long_regression_test.yml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pytest.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 6 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 8 additions & 7 deletions b/‎MANIFEST.in‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 5 deletions b/‎README.md‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎autoPyTorch/api/base_task.py‎
Lines changed: 27 additions & 25 deletions b/‎autoPyTorch/api/base_task.py‎
Lines changed: 27 additions & 25 deletions
diff --git a/‎autoPyTorch/api/tabular_regression.py‎
Lines changed: 7 additions & 3 deletions b/‎autoPyTorch/api/tabular_regression.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎autoPyTorch/data/base_target_validator.py‎
Lines changed: 0 additions & 1 deletion b/‎autoPyTorch/data/base_target_validator.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 0 additions & 1 deletion b/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎autoPyTorch/data/tabular_target_validator.py‎
Lines changed: 3 additions & 2 deletions b/‎autoPyTorch/data/tabular_target_validator.py‎
Lines changed: 3 additions & 2 deletions
@@ -0,0 +1,35 @@
+name: Tests
+
+on:
+  schedule:
+    # Every Truesday at 7AM UTC
+    # TODO teporary set to every day just for the PR
+    #- cron: '0 07 * * 2'
+    - cron: '0 07 * * *'
+
+
+jobs:
+  ubuntu:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8]
+      fail-fast:  false
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        ref: development
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install test dependencies
+      run: |
+        git submodule update --init --recursive
+        python -m pip install --upgrade pip
+        pip install -e .[test]
+    - name: Run tests
+      run: |
+        python -m pytest --durations=200 cicd/test_preselected_configs.py -vs
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         include:
           - python-version: 3.8
             code-cov: true
@@ -52,4 +52,4 @@ jobs:
       uses: codecov/codecov-action@v1
       with:
         fail_ci_if_error: true
-        verbose: true
+        verbose: true
@@ -3,21 +3,25 @@ repos:
     rev: v0.761
     hooks:
       - id: mypy
-        args: [--show-error-codes]
-        name: mypy AutoPyTorch
+        args: [--show-error-codes,
+               --warn-redundant-casts,
+               --warn-return-any,
+               --warn-unreachable,
+        ]
         files: autoPyTorch/.*
+        exclude: autoPyTorch/ensemble/
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.8.3
     hooks:
       - id: flake8
-        name: flake8 AutoPyTorch
-        files: autoPyTorch/.*
         additional_dependencies:
           - flake8-print==3.1.4
           - flake8-import-order
+        name: flake8 autoPyTorch
+        files: autoPyTorch/.*
       - id: flake8
-        name: flake8 tests
-        files: test/.*
         additional_dependencies:
           - flake8-print==3.1.4
           - flake8-import-order
+        name: flake8 test
+        files: test/.*
@@ -1,10 +1,11 @@
 include requirements.txt
 include autoPyTorch/utils/logging.yaml
 include autoPyTorch/configs/default_pipeline_options.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/catboost.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/rotation_forest.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/random_forest.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/knn.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/svm.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/extra_trees.json
-include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/lgb.json
+include autoPyTorch/configs/greedy_portfolio.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/catboost.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/rotation_forest.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/random_forest.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/knn.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/svm.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/extra_trees.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/lgb.json
@@ -27,11 +27,7 @@ git submodule update --init --recursive
 # Create the environment
 conda create -n autopytorch python=3.8
 conda activate autopytorch
-For Linux:
-    conda install gxx_linux-64 gcc_linux-64 swig
-For mac:
-    conda install -c conda-forge clang_osx-64 clangxx_osx-64
-    conda install -c anaconda swig
+conda install swig
 cat requirements.txt | xargs -n 1 -L 1 pip install
 python setup.py install
 
 
@@ -12,11 +12,12 @@
 import unittest.mock
 import warnings
 from abc import abstractmethod
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 
 import dask
+import dask.distributed
 
 import joblib
 
@@ -38,13 +39,12 @@
 from autoPyTorch.datasets.base_dataset import BaseDataset
 from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
-from autoPyTorch.ensemble.ensemble_selection import EnsembleSelection
 from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
 from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
 from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
-from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models import get_available_classifiers
+from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import get_available_traditional_learners
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score, get_metrics
 from autoPyTorch.utils.common import FitRequirement, replace_string_bool_to_bool
@@ -198,7 +198,7 @@ def __init__(
         # examples. Nevertheless, multi-process runs
         # have spawn as requirement to reduce the
         # possibility of a deadlock
-        self._dask_client = None
+        self._dask_client: Optional[dask.distributed.Client] = None
         self._multiprocessing_context = 'forkserver'
         if self.n_jobs == 1:
             self._multiprocessing_context = 'fork'
@@ -590,7 +590,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
         memory_limit = self._memory_limit
         if memory_limit is not None:
             memory_limit = int(math.ceil(memory_limit))
-        available_classifiers = get_available_classifiers()
+        available_classifiers = get_available_traditional_learners()
         dask_futures = []
 
         total_number_classifiers = len(available_classifiers)
@@ -711,7 +711,8 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: Optional[str] = None
+        portfolio_selection: Optional[str] = None,
+        dask_client: Optional[dask.distributed.Client] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -838,6 +839,8 @@ def _search(
         self._metric = get_metrics(
             names=[optimize_metric], dataset_properties=dataset_properties)[0]
 
+        self.pipeline_options['optimize_metric'] = optimize_metric
+
         self.search_space = self.get_search_space(dataset)
 
         budget_config: Dict[str, Union[float, str]] = {}
@@ -855,10 +858,11 @@ def _search(
         # If no dask client was provided, we create one, so that we can
         # start a ensemble process in parallel to smbo optimize
         if (
-            self._dask_client is None and (self.ensemble_size > 0 or self.n_jobs is not None and self.n_jobs > 1)
+            dask_client is None and (self.ensemble_size > 0 or self.n_jobs > 1)
         ):
             self._create_dask_client()
         else:
+            self._dask_client = dask_client
             self._is_dask_client_internally_created = False
 
         # Handle time resource allocation
@@ -892,21 +896,18 @@ def _search(
         # ============> Run traditional ml
 
         if enable_traditional_pipeline:
-            if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
-                self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
-            else:
-                traditional_task_name = 'runTraditional'
-                self._stopwatch.start_task(traditional_task_name)
-                elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
-                # We want time for at least 1 Neural network in SMAC
-                time_for_traditional = int(
-                    self._time_for_task - elapsed_time - func_eval_time_limit_secs
-                )
-                self._do_traditional_prediction(
-                    func_eval_time_limit_secs=func_eval_time_limit_secs,
-                    time_left=time_for_traditional,
-                )
-                self._stopwatch.stop_task(traditional_task_name)
+            traditional_task_name = 'runTraditional'
+            self._stopwatch.start_task(traditional_task_name)
+            elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
+            # We want time for at least 1 Neural network in SMAC
+            time_for_traditional = int(
+                self._time_for_task - elapsed_time - func_eval_time_limit_secs
+            )
+            self._do_traditional_prediction(
+                func_eval_time_limit_secs=func_eval_time_limit_secs,
+                time_left=time_for_traditional,
+            )
+            self._stopwatch.stop_task(traditional_task_name)
 
         # ============> Starting ensemble
         elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
@@ -1207,7 +1208,6 @@ def predict(
 
         # Mypy assert
         assert self.ensemble_ is not None, "Load models should error out if no ensemble"
-        self.ensemble_ = cast(Union[SingleBest, EnsembleSelection], self.ensemble_)
 
         if isinstance(self.resampling_strategy, HoldoutValTypes):
             models = self.models_
@@ -1316,15 +1316,17 @@ def get_models_with_weights(self) -> List:
             self._load_models()
 
         assert self.ensemble_ is not None
-        return self.ensemble_.get_models_with_weights(self.models_)
+        models_with_weights: List[Tuple[float, BasePipeline]] = self.ensemble_.get_models_with_weights(self.models_)
+        return models_with_weights
 
     def show_models(self) -> str:
         df = []
         for weight, model in self.get_models_with_weights():
             representation = model.get_pipeline_representation()
             representation.update({'Weight': weight})
             df.append(representation)
-        return pd.DataFrame(df).to_markdown()
+        models_markdown: str = pd.DataFrame(df).to_markdown()
+        return models_markdown
 
     def _print_debug_info_to_log(self) -> None:
         """
 
@@ -106,7 +106,7 @@ def search(
         budget: Optional[float] = None,
         total_walltime_limit: int = 100,
         func_eval_time_limit_secs: Optional[int] = None,
-        enable_traditional_pipeline: bool = False,
+        enable_traditional_pipeline: bool = True,
         memory_limit: Optional[int] = 4096,
         smac_scenario_args: Optional[Dict[str, Any]] = None,
         get_smac_object_callback: Optional[Callable] = None,
@@ -151,7 +151,7 @@ def search(
                 total_walltime_limit // 2 to allow enough time to fit
                 at least 2 individual machine learning algorithms.
                 Set to np.inf in case no time limit is desired.
-            enable_traditional_pipeline (bool), (default=False):
+            enable_traditional_pipeline (bool), (default=True):
                 Not enabled for regression. This flag is here to comply
                 with the API.
             memory_limit (Optional[int]), (default=4096): Memory
@@ -187,7 +187,11 @@ def search(
                 configurations, similar to (...herepathtogreedy...).
                 Additionally, the keyword 'greedy' is supported,
                 which would use the default portfolio from
-                `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
+                `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`.
+                Although portfolio selection is supported for tabular
+                regression, the portfolio has been built using
+                classification datasets. We will update a portfolio
+                to cover tabular regression datasets.
 
         Returns:
             self
 
@@ -95,7 +95,6 @@ def fit(
                                      np.shape(y_test)
                                  ))
             if isinstance(y_train, pd.DataFrame):
-                y_train = typing.cast(pd.DataFrame, y_train)
                 y_test = typing.cast(pd.DataFrame, y_test)
                 if y_train.columns.tolist() != y_test.columns.tolist():
                     raise ValueError(
 
@@ -145,7 +145,6 @@ def transform(
             X = self.numpy_array_to_pandas(X)
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
-            X = typing.cast(pd.DataFrame, X)
             if np.any(pd.isnull(X)):
                 for column in X.columns:
                     if X[column].isna().all():
 
@@ -194,8 +194,9 @@ def _check_data(
                 A set of features whose dimensionality and data type is going to be checked
         """
 
-        if not isinstance(
-                y, (np.ndarray, pd.DataFrame, list, pd.Series)) and not scipy.sparse.issparse(y):
+        if not isinstance(y, (np.ndarray, pd.DataFrame,
+                              typing.List, pd.Series)) \
+                and not scipy.sparse.issparse(y):  # type: ignore[misc]
             raise ValueError("AutoPyTorch only supports Numpy arrays, Pandas DataFrames,"
                              " pd.Series, sparse data and Python Lists as targets, yet, "
                              "the provided input is of type {}".format(