Skip to content

Commit e8cb0ba

Browse files
committed
Accept black reformatting
1 parent 2e4c8e7 commit e8cb0ba

File tree

191 files changed

+11884
-7871
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

191 files changed

+11884
-7871
lines changed

autoPyTorch/api/base_task.py

Lines changed: 315 additions & 202 deletions
Large diffs are not rendered by default.

autoPyTorch/api/tabular_classification.py

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
)
2020
from autoPyTorch.datasets.tabular_dataset import TabularDataset
2121
from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
22-
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
22+
from autoPyTorch.utils.hyperparameter_search_space_update import (
23+
HyperparameterSearchSpaceUpdates,
24+
)
2325

2426

2527
class TabularClassificationTask(BaseTask):
@@ -57,6 +59,7 @@ class TabularClassificationTask(BaseTask):
5759
specifies set of components not to use. Incompatible
5860
with include components
5961
"""
62+
6063
def __init__(
6164
self,
6265
seed: int = 1,
@@ -71,10 +74,12 @@ def __init__(
7174
delete_output_folder_after_terminate: bool = True,
7275
include_components: Optional[Dict] = None,
7376
exclude_components: Optional[Dict] = None,
74-
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
77+
resampling_strategy: Union[
78+
CrossValTypes, HoldoutValTypes
79+
] = HoldoutValTypes.holdout_validation,
7580
resampling_strategy_args: Optional[Dict[str, Any]] = None,
7681
backend: Optional[Backend] = None,
77-
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
82+
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
7883
):
7984
super().__init__(
8085
seed=seed,
@@ -96,7 +101,9 @@ def __init__(
96101
task_type=TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
97102
)
98103

99-
def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularClassificationPipeline:
104+
def build_pipeline(
105+
self, dataset_properties: Dict[str, Any]
106+
) -> TabularClassificationPipeline:
100107
return TabularClassificationPipeline(dataset_properties=dataset_properties)
101108

102109
def search(
@@ -119,7 +126,7 @@ def search(
119126
precision: int = 32,
120127
disable_file_output: List = [],
121128
load_models: bool = True,
122-
) -> 'BaseTask':
129+
) -> "BaseTask":
123130
"""
124131
Search for the best pipeline configuration for the given dataset.
125132
@@ -208,11 +215,15 @@ def search(
208215
# Fit a input validator to check the provided data
209216
# Also, an encoder is fit to both train and test data,
210217
# to prevent unseen categories during inference
211-
self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
218+
self.InputValidator.fit(
219+
X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
220+
)
212221

213222
self.dataset = TabularDataset(
214-
X=X_train, Y=y_train,
215-
X_test=X_test, Y_test=y_test,
223+
X=X_train,
224+
Y=y_train,
225+
X_test=X_test,
226+
Y_test=y_test,
216227
validator=self.InputValidator,
217228
resampling_strategy=self.resampling_strategy,
218229
resampling_strategy_args=self.resampling_strategy_args,
@@ -236,18 +247,18 @@ def search(
236247
)
237248

238249
def predict(
239-
self,
240-
X_test: np.ndarray,
241-
batch_size: Optional[int] = None,
242-
n_jobs: int = 1
250+
self, X_test: np.ndarray, batch_size: Optional[int] = None, n_jobs: int = 1
243251
) -> np.ndarray:
244252
if self.InputValidator is None or not self.InputValidator._is_fitted:
245-
raise ValueError("predict() is only supported after calling search. Kindly call first "
246-
"the estimator fit() method.")
253+
raise ValueError(
254+
"predict() is only supported after calling search. Kindly call first "
255+
"the estimator fit() method."
256+
)
247257

248258
X_test = self.InputValidator.feature_validator.transform(X_test)
249-
predicted_probabilities = super().predict(X_test, batch_size=batch_size,
250-
n_jobs=n_jobs)
259+
predicted_probabilities = super().predict(
260+
X_test, batch_size=batch_size, n_jobs=n_jobs
261+
)
251262

252263
if self.InputValidator.target_validator.is_single_column_target():
253264
predicted_indexes = np.argmax(predicted_probabilities, axis=1)
@@ -258,11 +269,16 @@ def predict(
258269
# in our encoded values
259270
return self.InputValidator.target_validator.inverse_transform(predicted_indexes)
260271

261-
def predict_proba(self,
262-
X_test: Union[np.ndarray, pd.DataFrame, List],
263-
batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
272+
def predict_proba(
273+
self,
274+
X_test: Union[np.ndarray, pd.DataFrame, List],
275+
batch_size: Optional[int] = None,
276+
n_jobs: int = 1,
277+
) -> np.ndarray:
264278
if self.InputValidator is None or not self.InputValidator._is_fitted:
265-
raise ValueError("predict() is only supported after calling search. Kindly call first "
266-
"the estimator fit() method.")
279+
raise ValueError(
280+
"predict() is only supported after calling search. Kindly call first "
281+
"the estimator fit() method."
282+
)
267283
X_test = self.InputValidator.feature_validator.transform(X_test)
268284
return super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)

autoPyTorch/api/tabular_regression.py

Lines changed: 40 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,17 @@
88

99
from autoPyTorch.api.base_task import BaseTask
1010
from autoPyTorch.automl_common.common.utils.backend import Backend
11-
from autoPyTorch.constants import (
12-
TABULAR_REGRESSION,
13-
TASK_TYPES_TO_STRING
14-
)
11+
from autoPyTorch.constants import TABULAR_REGRESSION, TASK_TYPES_TO_STRING
1512
from autoPyTorch.data.tabular_validator import TabularInputValidator
1613
from autoPyTorch.datasets.resampling_strategy import (
1714
CrossValTypes,
1815
HoldoutValTypes,
1916
)
2017
from autoPyTorch.datasets.tabular_dataset import TabularDataset
2118
from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
22-
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
19+
from autoPyTorch.utils.hyperparameter_search_space_update import (
20+
HyperparameterSearchSpaceUpdates,
21+
)
2322

2423

2524
class TabularRegressionTask(BaseTask):
@@ -50,23 +49,25 @@ class TabularRegressionTask(BaseTask):
5049
"""
5150

5251
def __init__(
53-
self,
54-
seed: int = 1,
55-
n_jobs: int = 1,
56-
logging_config: Optional[Dict] = None,
57-
ensemble_size: int = 50,
58-
ensemble_nbest: int = 50,
59-
max_models_on_disc: int = 50,
60-
temporary_directory: Optional[str] = None,
61-
output_directory: Optional[str] = None,
62-
delete_tmp_folder_after_terminate: bool = True,
63-
delete_output_folder_after_terminate: bool = True,
64-
include_components: Optional[Dict] = None,
65-
exclude_components: Optional[Dict] = None,
66-
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
67-
resampling_strategy_args: Optional[Dict[str, Any]] = None,
68-
backend: Optional[Backend] = None,
69-
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
52+
self,
53+
seed: int = 1,
54+
n_jobs: int = 1,
55+
logging_config: Optional[Dict] = None,
56+
ensemble_size: int = 50,
57+
ensemble_nbest: int = 50,
58+
max_models_on_disc: int = 50,
59+
temporary_directory: Optional[str] = None,
60+
output_directory: Optional[str] = None,
61+
delete_tmp_folder_after_terminate: bool = True,
62+
delete_output_folder_after_terminate: bool = True,
63+
include_components: Optional[Dict] = None,
64+
exclude_components: Optional[Dict] = None,
65+
resampling_strategy: Union[
66+
CrossValTypes, HoldoutValTypes
67+
] = HoldoutValTypes.holdout_validation,
68+
resampling_strategy_args: Optional[Dict[str, Any]] = None,
69+
backend: Optional[Backend] = None,
70+
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
7071
):
7172
super().__init__(
7273
seed=seed,
@@ -88,7 +89,9 @@ def __init__(
8889
task_type=TASK_TYPES_TO_STRING[TABULAR_REGRESSION],
8990
)
9091

91-
def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularRegressionPipeline:
92+
def build_pipeline(
93+
self, dataset_properties: Dict[str, Any]
94+
) -> TabularRegressionPipeline:
9295
return TabularRegressionPipeline(dataset_properties=dataset_properties)
9396

9497
def search(
@@ -111,7 +114,7 @@ def search(
111114
precision: int = 32,
112115
disable_file_output: List = [],
113116
load_models: bool = True,
114-
) -> 'BaseTask':
117+
) -> "BaseTask":
115118
"""
116119
Search for the best pipeline configuration for the given dataset.
117120
@@ -196,11 +199,15 @@ def search(
196199
# Fit a input validator to check the provided data
197200
# Also, an encoder is fit to both train and test data,
198201
# to prevent unseen categories during inference
199-
self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
202+
self.InputValidator.fit(
203+
X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
204+
)
200205

201206
self.dataset = TabularDataset(
202-
X=X_train, Y=y_train,
203-
X_test=X_test, Y_test=y_test,
207+
X=X_train,
208+
Y=y_train,
209+
X_test=X_test,
210+
Y_test=y_test,
204211
validator=self.InputValidator,
205212
resampling_strategy=self.resampling_strategy,
206213
resampling_strategy_args=self.resampling_strategy_args,
@@ -224,18 +231,16 @@ def search(
224231
)
225232

226233
def predict(
227-
self,
228-
X_test: np.ndarray,
229-
batch_size: Optional[int] = None,
230-
n_jobs: int = 1
234+
self, X_test: np.ndarray, batch_size: Optional[int] = None, n_jobs: int = 1
231235
) -> np.ndarray:
232236
if self.InputValidator is None or not self.InputValidator._is_fitted:
233-
raise ValueError("predict() is only supported after calling search. Kindly call first "
234-
"the estimator fit() method.")
237+
raise ValueError(
238+
"predict() is only supported after calling search. Kindly call first "
239+
"the estimator fit() method."
240+
)
235241

236242
X_test = self.InputValidator.feature_validator.transform(X_test)
237-
predicted_values = super().predict(X_test, batch_size=batch_size,
238-
n_jobs=n_jobs)
243+
predicted_values = super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)
239244

240245
# Allow to predict in the original domain -- that is, the user is not interested
241246
# in our encoded values

autoPyTorch/constants.py

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,33 @@
66
TIMESERIES_REGRESSION = 6
77

88
REGRESSION_TASKS = [TABULAR_REGRESSION, IMAGE_REGRESSION, TIMESERIES_REGRESSION]
9-
CLASSIFICATION_TASKS = [TABULAR_CLASSIFICATION, IMAGE_CLASSIFICATION, TIMESERIES_CLASSIFICATION]
9+
CLASSIFICATION_TASKS = [
10+
TABULAR_CLASSIFICATION,
11+
IMAGE_CLASSIFICATION,
12+
TIMESERIES_CLASSIFICATION,
13+
]
1014

1115
TABULAR_TASKS = [TABULAR_CLASSIFICATION, TABULAR_REGRESSION]
1216
IMAGE_TASKS = [IMAGE_CLASSIFICATION, IMAGE_REGRESSION]
1317
TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS
1418

15-
TASK_TYPES_TO_STRING = \
16-
{TABULAR_CLASSIFICATION: 'tabular_classification',
17-
IMAGE_CLASSIFICATION: 'image_classification',
18-
TABULAR_REGRESSION: 'tabular_regression',
19-
IMAGE_REGRESSION: 'image_regression',
20-
TIMESERIES_CLASSIFICATION: 'time_series_classification',
21-
TIMESERIES_REGRESSION: 'time_series_regression'}
22-
23-
STRING_TO_TASK_TYPES = \
24-
{'tabular_classification': TABULAR_CLASSIFICATION,
25-
'image_classification': IMAGE_CLASSIFICATION,
26-
'tabular_regression': TABULAR_REGRESSION,
27-
'image_regression': IMAGE_REGRESSION,
28-
'time_series_classification': TIMESERIES_CLASSIFICATION,
29-
'time_series_regression': TIMESERIES_REGRESSION}
19+
TASK_TYPES_TO_STRING = {
20+
TABULAR_CLASSIFICATION: "tabular_classification",
21+
IMAGE_CLASSIFICATION: "image_classification",
22+
TABULAR_REGRESSION: "tabular_regression",
23+
IMAGE_REGRESSION: "image_regression",
24+
TIMESERIES_CLASSIFICATION: "time_series_classification",
25+
TIMESERIES_REGRESSION: "time_series_regression",
26+
}
27+
28+
STRING_TO_TASK_TYPES = {
29+
"tabular_classification": TABULAR_CLASSIFICATION,
30+
"image_classification": IMAGE_CLASSIFICATION,
31+
"tabular_regression": TABULAR_REGRESSION,
32+
"image_regression": IMAGE_REGRESSION,
33+
"time_series_classification": TIMESERIES_CLASSIFICATION,
34+
"time_series_regression": TIMESERIES_REGRESSION,
35+
}
3036

3137
# Output types have been defined as in scikit-learn type_of_target
3238
# (https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html)
@@ -38,19 +44,21 @@
3844

3945
OUTPUT_TYPES = [BINARY, CONTINUOUSMULTIOUTPUT, MULTICLASS, CONTINUOUS]
4046

41-
OUTPUT_TYPES_TO_STRING = \
42-
{BINARY: 'binary',
43-
CONTINUOUSMULTIOUTPUT: 'continuous-multioutput',
44-
MULTICLASS: 'multiclass',
45-
CONTINUOUS: 'continuous',
46-
MULTICLASSMULTIOUTPUT: 'multiclass-multioutput'}
47-
48-
STRING_TO_OUTPUT_TYPES = \
49-
{'binary': BINARY,
50-
'continuous-multioutput': CONTINUOUSMULTIOUTPUT,
51-
'multiclass': MULTICLASS,
52-
'continuous': CONTINUOUS,
53-
'multiclass-multioutput': MULTICLASSMULTIOUTPUT}
47+
OUTPUT_TYPES_TO_STRING = {
48+
BINARY: "binary",
49+
CONTINUOUSMULTIOUTPUT: "continuous-multioutput",
50+
MULTICLASS: "multiclass",
51+
CONTINUOUS: "continuous",
52+
MULTICLASSMULTIOUTPUT: "multiclass-multioutput",
53+
}
54+
55+
STRING_TO_OUTPUT_TYPES = {
56+
"binary": BINARY,
57+
"continuous-multioutput": CONTINUOUSMULTIOUTPUT,
58+
"multiclass": MULTICLASS,
59+
"continuous": CONTINUOUS,
60+
"multiclass-multioutput": MULTICLASSMULTIOUTPUT,
61+
}
5462

5563
CLASSIFICATION_OUTPUTS = [BINARY, MULTICLASS, MULTICLASSMULTIOUTPUT]
5664
REGRESSION_OUTPUTS = [CONTINUOUS, CONTINUOUSMULTIOUTPUT]

autoPyTorch/data/base_feature_validator.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,13 @@ class BaseFeatureValidator(BaseEstimator):
4141
enc_columns (typing.List[str])
4242
List of columns that were encoded.
4343
"""
44-
def __init__(self,
45-
logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
46-
]] = None,
47-
) -> None:
44+
45+
def __init__(
46+
self,
47+
logger: typing.Optional[
48+
typing.Union[PicklableClientLogger, logging.Logger]
49+
] = None,
50+
) -> None:
4851
# Register types to detect unsupported data format changes
4952
self.feat_type = None # type: typing.Optional[typing.List[str]]
5053
self.data_type = None # type: typing.Optional[type]
@@ -54,9 +57,9 @@ def __init__(self,
5457
self.encoder = None # type: typing.Optional[BaseEstimator]
5558
self.enc_columns = [] # type: typing.List[str]
5659

57-
self.logger: typing.Union[
58-
PicklableClientLogger, logging.Logger
59-
] = logger if logger is not None else logging.getLogger(__name__)
60+
self.logger: typing.Union[PicklableClientLogger, logging.Logger] = (
61+
logger if logger is not None else logging.getLogger(__name__)
62+
)
6063

6164
# Required for dataset properties
6265
self.num_features = None # type: typing.Optional[int]
@@ -94,11 +97,12 @@ def fit(
9497
self._check_data(X_test)
9598

9699
if np.shape(X_train)[1] != np.shape(X_test)[1]:
97-
raise ValueError("The feature dimensionality of the train and test "
98-
"data does not match train({}) != test({})".format(
99-
np.shape(X_train)[1],
100-
np.shape(X_test)[1]
101-
))
100+
raise ValueError(
101+
"The feature dimensionality of the train and test "
102+
"data does not match train({}) != test({})".format(
103+
np.shape(X_train)[1], np.shape(X_test)[1]
104+
)
105+
)
102106

103107
# Fit on the training data
104108
self._fit(X_train)

0 commit comments

Comments
 (0)