Skip to content

Commit f1e837d

Browse files
authored
Merge branch 'development' into add-col_tfr
2 parents 7288128 + 9002937 commit f1e837d

32 files changed

+709
-273
lines changed

.github/workflows/pytest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
strategy:
1010
matrix:
11-
python-version: [3.7, 3.8]
11+
python-version: [3.7, 3.8, 3.9]
1212
include:
1313
- python-version: 3.8
1414
code-cov: true

autoPyTorch/api/base_task.py

Lines changed: 89 additions & 66 deletions
Large diffs are not rendered by default.

autoPyTorch/api/tabular_classification.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,9 @@ def search(
110110
X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
111111
y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
112112
dataset_name: Optional[str] = None,
113-
budget_type: Optional[str] = None,
114-
budget: Optional[float] = None,
113+
budget_type: str = 'epochs',
114+
min_budget: int = 5,
115+
max_budget: int = 50,
115116
total_walltime_limit: int = 100,
116117
func_eval_time_limit_secs: Optional[int] = None,
117118
enable_traditional_pipeline: bool = True,
@@ -137,15 +138,38 @@ def search(
137138
be provided to track the generalization performance of each stage.
138139
optimize_metric (str):
139140
name of the metric that is used to evaluate a pipeline.
140-
budget_type (Optional[str]):
141+
budget_type (str):
141142
Type of budget to be used when fitting the pipeline.
142-
Either 'epochs' or 'runtime'. If not provided, uses
143-
the default in the pipeline config ('epochs')
144-
budget (Optional[float]):
145-
Budget to fit a single run of the pipeline. If not
146-
provided, uses the default in the pipeline config
147-
total_walltime_limit (int), (default=100):
148-
Time limit in seconds for the search of appropriate models.
143+
It can be one of:
144+
+ 'epochs': The training of each pipeline will be terminated after
145+
a number of epochs have passed. This number of epochs is determined by the
146+
budget argument of this method.
147+
+ 'runtime': The training of each pipeline will be terminated after
148+
a number of seconds have passed. This number of seconds is determined by the
149+
budget argument of this method. The overall fitting time of a pipeline is
150+
controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
151+
time to train a pipeline, but it does not consider the overall time it takes
152+
to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
153+
budget_type will determine the units of min_budget/max_budget. If budget_type=='epochs'
154+
is used, min_budget will refer to epochs whereas if budget_type=='runtime' then
155+
min_budget will refer to seconds.
156+
min_budget (int):
157+
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
158+
trade-off resources between running many pipelines at min_budget and
159+
running the top performing pipelines on max_budget.
160+
min_budget states the minimum resource allocation a pipeline should have
161+
so that we can compare and quickly discard bad performing models.
162+
For example, if the budget_type is epochs, and min_budget=5, then we will
163+
run every pipeline to a minimum of 5 epochs before performance comparison.
164+
max_budget (int):
165+
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
166+
trade-off resources between running many pipelines at min_budget and
167+
running the top performing pipelines on max_budget.
168+
max_budget states the maximum resource allocation a pipeline is going to
169+
be ran. For example, if the budget_type is epochs, and max_budget=50,
170+
then the pipeline training will be terminated after 50 epochs.
171+
total_walltime_limit (int), (default=100): Time limit
172+
in seconds for the search of appropriate models.
149173
By increasing this value, autopytorch has a higher
150174
chance of finding better models.
151175
func_eval_time_limit_secs (int), (default=None):
@@ -234,7 +258,8 @@ def search(
234258
dataset=self.dataset,
235259
optimize_metric=optimize_metric,
236260
budget_type=budget_type,
237-
budget=budget,
261+
min_budget=min_budget,
262+
max_budget=max_budget,
238263
total_walltime_limit=total_walltime_limit,
239264
func_eval_time_limit_secs=func_eval_time_limit_secs,
240265
enable_traditional_pipeline=enable_traditional_pipeline,

autoPyTorch/api/tabular_regression.py

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,23 @@ class TabularRegressionTask(BaseTask):
5353
"""
5454

5555
def __init__(
56-
self,
57-
seed: int = 1,
58-
n_jobs: int = 1,
59-
logging_config: Optional[Dict] = None,
60-
ensemble_size: int = 50,
61-
ensemble_nbest: int = 50,
62-
max_models_on_disc: int = 50,
63-
temporary_directory: Optional[str] = None,
64-
output_directory: Optional[str] = None,
65-
delete_tmp_folder_after_terminate: bool = True,
66-
delete_output_folder_after_terminate: bool = True,
67-
include_components: Optional[Dict] = None,
68-
exclude_components: Optional[Dict] = None,
69-
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
70-
resampling_strategy_args: Optional[Dict[str, Any]] = None,
71-
backend: Optional[Backend] = None,
72-
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
56+
self,
57+
seed: int = 1,
58+
n_jobs: int = 1,
59+
logging_config: Optional[Dict] = None,
60+
ensemble_size: int = 50,
61+
ensemble_nbest: int = 50,
62+
max_models_on_disc: int = 50,
63+
temporary_directory: Optional[str] = None,
64+
output_directory: Optional[str] = None,
65+
delete_tmp_folder_after_terminate: bool = True,
66+
delete_output_folder_after_terminate: bool = True,
67+
include_components: Optional[Dict] = None,
68+
exclude_components: Optional[Dict] = None,
69+
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
70+
resampling_strategy_args: Optional[Dict[str, Any]] = None,
71+
backend: Optional[Backend] = None,
72+
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
7373
):
7474
super().__init__(
7575
seed=seed,
@@ -102,8 +102,9 @@ def search(
102102
X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
103103
y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
104104
dataset_name: Optional[str] = None,
105-
budget_type: Optional[str] = None,
106-
budget: Optional[float] = None,
105+
budget_type: str = 'epochs',
106+
min_budget: int = 5,
107+
max_budget: int = 50,
107108
total_walltime_limit: int = 100,
108109
func_eval_time_limit_secs: Optional[int] = None,
109110
enable_traditional_pipeline: bool = True,
@@ -129,13 +130,36 @@ def search(
129130
be provided to track the generalization performance of each stage.
130131
optimize_metric (str): name of the metric that is used to
131132
evaluate a pipeline.
132-
budget_type (Optional[str]):
133+
budget_type (str):
133134
Type of budget to be used when fitting the pipeline.
134-
Either 'epochs' or 'runtime'. If not provided, uses
135-
the default in the pipeline config ('epochs')
136-
budget (Optional[float]):
137-
Budget to fit a single run of the pipeline. If not
138-
provided, uses the default in the pipeline config
135+
It can be one of:
136+
+ 'epochs': The training of each pipeline will be terminated after
137+
a number of epochs have passed. This number of epochs is determined by the
138+
budget argument of this method.
139+
+ 'runtime': The training of each pipeline will be terminated after
140+
a number of seconds have passed. This number of seconds is determined by the
141+
budget argument of this method. The overall fitting time of a pipeline is
142+
controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
143+
time to train a pipeline, but it does not consider the overall time it takes
144+
to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
145+
budget_type will determine the units of min_budget/max_budget. If budget_type=='epochs'
146+
is used, min_budget will refer to epochs whereas if budget_type=='runtime' then
147+
min_budget will refer to seconds.
148+
min_budget (int):
149+
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
150+
trade-off resources between running many pipelines at min_budget and
151+
running the top performing pipelines on max_budget.
152+
min_budget states the minimum resource allocation a pipeline should have
153+
so that we can compare and quickly discard bad performing models.
154+
For example, if the budget_type is epochs, and min_budget=5, then we will
155+
run every pipeline to a minimum of 5 epochs before performance comparison.
156+
max_budget (int):
157+
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
158+
trade-off resources between running many pipelines at min_budget and
159+
running the top performing pipelines on max_budget.
160+
max_budget states the maximum resource allocation a pipeline is going to
161+
be ran. For example, if the budget_type is epochs, and max_budget=50,
162+
then the pipeline training will be terminated after 50 epochs.
139163
total_walltime_limit (int), (default=100): Time limit
140164
in seconds for the search of appropriate models.
141165
By increasing this value, autopytorch has a higher
@@ -227,7 +251,8 @@ def search(
227251
dataset=self.dataset,
228252
optimize_metric=optimize_metric,
229253
budget_type=budget_type,
230-
budget=budget,
254+
min_budget=min_budget,
255+
max_budget=max_budget,
231256
total_walltime_limit=total_walltime_limit,
232257
func_eval_time_limit_secs=func_eval_time_limit_secs,
233258
enable_traditional_pipeline=enable_traditional_pipeline,
Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
{
2-
"device": "cpu",
3-
"budget_type": "epochs",
4-
"min_epochs": 5,
5-
"epochs": 50,
6-
"runtime": 3600,
7-
"torch_num_threads": 1,
8-
"early_stopping": 20,
9-
"use_tensorboard_logger": "False",
10-
"metrics_during_training": "True"
2+
"device": "cpu",
3+
"budget_type": "epochs",
4+
"epochs": 50,
5+
"runtime": 3600,
6+
"torch_num_threads": 1,
7+
"early_stopping": 20,
8+
"use_tensorboard_logger": "False",
9+
"metrics_during_training": "True"
1110
}

autoPyTorch/evaluation/abstract_evaluator.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
calculate_loss,
4343
get_metrics,
4444
)
45-
from autoPyTorch.utils.common import subsampler
45+
from autoPyTorch.utils.common import dict_repr, subsampler
4646
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
4747
from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
4848
from autoPyTorch.utils.pipeline import get_dataset_requirements
@@ -515,6 +515,12 @@ def __init__(self, backend: Backend,
515515
# If the budget is epochs, we want to limit that in the fit dictionary
516516
if self.budget_type == 'epochs':
517517
self.fit_dictionary['epochs'] = budget
518+
self.fit_dictionary.pop('runtime', None)
519+
elif self.budget_type == 'runtime':
520+
self.fit_dictionary['runtime'] = budget
521+
self.fit_dictionary.pop('epochs', None)
522+
else:
523+
raise ValueError(f"Unsupported budget type {self.budget_type} provided")
518524

519525
self.num_run = 0 if num_run is None else num_run
520526

@@ -531,7 +537,7 @@ def __init__(self, backend: Backend,
531537
self.Y_actual_train: Optional[np.ndarray] = None
532538
self.pipelines: Optional[List[BaseEstimator]] = None
533539
self.pipeline: Optional[BaseEstimator] = None
534-
self.logger.debug("Fit dictionary in Abstract evaluator: {}".format(self.fit_dictionary))
540+
self.logger.debug("Fit dictionary in Abstract evaluator: {}".format(dict_repr(self.fit_dictionary)))
535541
self.logger.debug("Search space updates :{}".format(self.search_space_updates))
536542

537543
def _get_pipeline(self) -> BaseEstimator:

autoPyTorch/evaluation/tae.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from autoPyTorch.automl_common.common.utils.backend import Backend
2727
from autoPyTorch.evaluation.utils import empty_queue, extract_learning_curve, read_queue
2828
from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
29-
from autoPyTorch.utils.common import replace_string_bool_to_bool
29+
from autoPyTorch.utils.common import dict_repr, replace_string_bool_to_bool
3030
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
3131
from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
3232
from autoPyTorch.utils.parallel import preload_modules
@@ -209,9 +209,14 @@ def run_wrapper(
209209
)
210210
else:
211211
if run_info.budget == 0:
212-
run_info = run_info._replace(budget=self.pipeline_config[self.budget_type])
213-
elif run_info.budget <= 0 or run_info.budget > 100:
214-
raise ValueError('Illegal value for budget, must be >0 and <=100, but is %f' %
212+
# SMAC can return budget zero for intensifiers that don't have a concept
213+
# of budget, for example a simple bayesian optimization intensifier.
214+
# Budget determines how our pipeline trains, which can be via runtime or epochs
215+
epochs_budget = self.pipeline_config.get('epochs', np.inf)
216+
runtime_budget = self.pipeline_config.get('runtime', np.inf)
217+
run_info = run_info._replace(budget=min(epochs_budget, runtime_budget))
218+
elif run_info.budget <= 0:
219+
raise ValueError('Illegal value for budget, must be greater than zero but is %f' %
215220
run_info.budget)
216221
if self.budget_type not in ('epochs', 'runtime'):
217222
raise ValueError("Illegal value for budget type, must be one of "
@@ -454,7 +459,14 @@ def run(
454459

455460
empty_queue(queue)
456461
self.logger.debug(
457-
'Finished function evaluation %s. Status: %s, Cost: %f, Runtime: %f, Additional %s',
458-
str(num_run), status, cost, runtime, additional_run_info,
462+
"Finish function evaluation {}.\n"
463+
"Status: {}, Cost: {}, Runtime: {},\n"
464+
"Additional information:\n{}".format(
465+
str(num_run),
466+
status,
467+
cost,
468+
runtime,
469+
dict_repr(additional_run_info)
470+
)
459471
)
460472
return status, cost, runtime, additional_run_info

autoPyTorch/evaluation/train_evaluator.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
fit_and_suppress_warnings
2020
)
2121
from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
22-
from autoPyTorch.utils.common import subsampler
22+
from autoPyTorch.utils.common import dict_repr, subsampler
2323
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
2424

2525
__all__ = ['TrainEvaluator', 'eval_function']
@@ -172,11 +172,11 @@ def fit_predict_and_loss(self) -> None:
172172

173173
status = StatusType.SUCCESS
174174

175-
self.logger.debug("In train evaluator fit_predict_and_loss, num_run: {} loss:{},"
176-
" additional run info:{}, status: {}".format(self.num_run,
177-
loss,
178-
additional_run_info,
179-
status))
175+
self.logger.debug("In train evaluator.fit_predict_and_loss, num_run: {} loss:{},"
176+
" status: {},\nadditional run info:\n{}".format(self.num_run,
177+
loss,
178+
dict_repr(additional_run_info),
179+
status))
180180
self.finish_up(
181181
loss=loss,
182182
train_loss=train_loss,

0 commit comments

Comments
 (0)