@@ -53,23 +53,23 @@ class TabularRegressionTask(BaseTask):
53
53
"""
54
54
55
55
def __init__ (
56
- self ,
57
- seed : int = 1 ,
58
- n_jobs : int = 1 ,
59
- logging_config : Optional [Dict ] = None ,
60
- ensemble_size : int = 50 ,
61
- ensemble_nbest : int = 50 ,
62
- max_models_on_disc : int = 50 ,
63
- temporary_directory : Optional [str ] = None ,
64
- output_directory : Optional [str ] = None ,
65
- delete_tmp_folder_after_terminate : bool = True ,
66
- delete_output_folder_after_terminate : bool = True ,
67
- include_components : Optional [Dict ] = None ,
68
- exclude_components : Optional [Dict ] = None ,
69
- resampling_strategy : Union [CrossValTypes , HoldoutValTypes ] = HoldoutValTypes .holdout_validation ,
70
- resampling_strategy_args : Optional [Dict [str , Any ]] = None ,
71
- backend : Optional [Backend ] = None ,
72
- search_space_updates : Optional [HyperparameterSearchSpaceUpdates ] = None
56
+ self ,
57
+ seed : int = 1 ,
58
+ n_jobs : int = 1 ,
59
+ logging_config : Optional [Dict ] = None ,
60
+ ensemble_size : int = 50 ,
61
+ ensemble_nbest : int = 50 ,
62
+ max_models_on_disc : int = 50 ,
63
+ temporary_directory : Optional [str ] = None ,
64
+ output_directory : Optional [str ] = None ,
65
+ delete_tmp_folder_after_terminate : bool = True ,
66
+ delete_output_folder_after_terminate : bool = True ,
67
+ include_components : Optional [Dict ] = None ,
68
+ exclude_components : Optional [Dict ] = None ,
69
+ resampling_strategy : Union [CrossValTypes , HoldoutValTypes ] = HoldoutValTypes .holdout_validation ,
70
+ resampling_strategy_args : Optional [Dict [str , Any ]] = None ,
71
+ backend : Optional [Backend ] = None ,
72
+ search_space_updates : Optional [HyperparameterSearchSpaceUpdates ] = None
73
73
):
74
74
super ().__init__ (
75
75
seed = seed ,
@@ -102,8 +102,9 @@ def search(
102
102
X_test : Optional [Union [List , pd .DataFrame , np .ndarray ]] = None ,
103
103
y_test : Optional [Union [List , pd .DataFrame , np .ndarray ]] = None ,
104
104
dataset_name : Optional [str ] = None ,
105
- budget_type : Optional [str ] = None ,
106
- budget : Optional [float ] = None ,
105
+ budget_type : str = 'epochs' ,
106
+ min_budget : int = 5 ,
107
+ max_budget : int = 50 ,
107
108
total_walltime_limit : int = 100 ,
108
109
func_eval_time_limit_secs : Optional [int ] = None ,
109
110
enable_traditional_pipeline : bool = True ,
@@ -129,13 +130,36 @@ def search(
129
130
be provided to track the generalization performance of each stage.
130
131
optimize_metric (str): name of the metric that is used to
131
132
evaluate a pipeline.
132
- budget_type (Optional[ str] ):
133
+ budget_type (str):
133
134
Type of budget to be used when fitting the pipeline.
134
- Either 'epochs' or 'runtime'. If not provided, uses
135
- the default in the pipeline config ('epochs')
136
- budget (Optional[float]):
137
- Budget to fit a single run of the pipeline. If not
138
- provided, uses the default in the pipeline config
135
+ It can be one of:
136
+ + 'epochs': The training of each pipeline will be terminated after
137
+ a number of epochs have passed. This number of epochs is determined by the
138
+ budget argument of this method.
139
+ + 'runtime': The training of each pipeline will be terminated after
140
+ a number of seconds have passed. This number of seconds is determined by the
141
+ budget argument of this method. The overall fitting time of a pipeline is
142
+ controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
143
+ time to train a pipeline, but it does not consider the overall time it takes
144
+ to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
145
+ budget_type will determine the units of min_budget/max_budget. If budget_type=='epochs'
146
+ is used, min_budget will refer to epochs whereas if budget_type=='runtime' then
147
+ min_budget will refer to seconds.
148
+ min_budget (int):
149
+ Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
150
+ trade-off resources between running many pipelines at min_budget and
151
+ running the top performing pipelines on max_budget.
152
+ min_budget states the minimum resource allocation a pipeline should have
153
+ so that we can compare and quickly discard bad performing models.
154
+ For example, if the budget_type is epochs, and min_budget=5, then we will
155
+ run every pipeline to a minimum of 5 epochs before performance comparison.
156
+ max_budget (int):
157
+ Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
158
+ trade-off resources between running many pipelines at min_budget and
159
+ running the top performing pipelines on max_budget.
160
+ max_budget states the maximum resource allocation a pipeline is going to
161
+ be ran. For example, if the budget_type is epochs, and max_budget=50,
162
+ then the pipeline training will be terminated after 50 epochs.
139
163
total_walltime_limit (int), (default=100): Time limit
140
164
in seconds for the search of appropriate models.
141
165
By increasing this value, autopytorch has a higher
@@ -227,7 +251,8 @@ def search(
227
251
dataset = self .dataset ,
228
252
optimize_metric = optimize_metric ,
229
253
budget_type = budget_type ,
230
- budget = budget ,
254
+ min_budget = min_budget ,
255
+ max_budget = max_budget ,
231
256
total_walltime_limit = total_walltime_limit ,
232
257
func_eval_time_limit_secs = func_eval_time_limit_secs ,
233
258
enable_traditional_pipeline = enable_traditional_pipeline ,
0 commit comments