-
Notifications
You must be signed in to change notification settings - Fork 301
Feature preprocessors, Loss strategies #86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
franchuterivera
merged 27 commits into
automl:refactor_development
from
ravinkohli:missing_components
Feb 9, 2021
Merged
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit
Hold shift + click to select a range
cc583c1
ADD Weighted loss
ravinkohli 2ea059c
Now?
ravinkohli 14795cc
Merge branch 'feature_preprocessing' into missing_components
ravinkohli 9f0ed18
Fix tests, flake, mypy
ravinkohli fb23cef
Fix tests
ravinkohli a8ea7b5
Fix mypy
ravinkohli 8a618ff
change back sklearn requirement
ravinkohli 8a389b2
Assert for fast ica sklearn bug
ravinkohli ce1778b
Forgot to add skip
ravinkohli 0b1f3f0
Fix tests, changed num only data to float
ravinkohli 0795b44
removed fast ica
ravinkohli bf69120
change num only dataset
ravinkohli e7d8606
Increased number of features in num only
ravinkohli 8a95f61
Increase timeout for pytest
ravinkohli 0a2d74f
ADD tensorboard to requirement
ravinkohli 36b2c22
Fix bug with small_preprocess
ravinkohli d222826
Fix bug in pytest execution
ravinkohli 90fdcfe
Fix tests
ravinkohli df9ec6e
ADD error is raised if default not in include
ravinkohli 95378f7
Added dynamic search space for deciding n components in feature prepr…
ravinkohli 0c88cab
Moved back to random configs in tabular test
ravinkohli 3ec87b1
Added floor and ceil and handling of logs
ravinkohli 6546d5c
Fix flake
ravinkohli 9388c32
Remove TruncatedSVD from cs if num numerical ==1
ravinkohli b6c2cd0
ADD flakyness to network accuracy test
ravinkohli 18f79e2
fix flake
ravinkohli e974969
remove cla to pytest
ravinkohli File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
.../pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/FastICA.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from typing import Any, Dict, Optional, Tuple, Union | ||
|
||
from ConfigSpace.conditions import EqualsCondition | ||
from ConfigSpace.configuration_space import ConfigurationSpace | ||
from ConfigSpace.hyperparameters import ( | ||
CategoricalHyperparameter, | ||
UniformIntegerHyperparameter, | ||
) | ||
|
||
import numpy as np | ||
|
||
import sklearn.decomposition | ||
from sklearn.base import BaseEstimator | ||
|
||
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\ | ||
base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent | ||
|
||
|
||
class FastICA(autoPyTorchFeaturePreprocessingComponent): | ||
def __init__(self, n_components: int = 100, | ||
algorithm: str = 'parallel', | ||
whiten: bool = False, | ||
fun: str = 'logcosh', | ||
random_state: Optional[Union[int, np.random.RandomState]] = None | ||
) -> None: | ||
self.n_components = n_components | ||
self.algorithm = algorithm | ||
self.whiten = whiten | ||
self.fun = fun | ||
self.random_state = random_state | ||
|
||
super().__init__() | ||
|
||
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: | ||
|
||
self.preprocessor['numerical'] = sklearn.decomposition.FastICA( | ||
n_components=self.n_components, algorithm=self.algorithm, | ||
fun=self.fun, whiten=self.whiten, random_state=self.random_state | ||
) | ||
|
||
return self | ||
|
||
@staticmethod | ||
def get_hyperparameter_search_space( | ||
dataset_properties: Optional[Dict[str, str]] = None, | ||
n_components: Tuple[Tuple, int] = ((10, 2000), 100), | ||
algorithm: Tuple[Tuple, str] = (('parallel', 'deflation'), 'parallel'), | ||
whiten: Tuple[Tuple, bool] = ((True, False), False), | ||
fun: Tuple[Tuple, str] = (('logcosh', 'exp', 'cube'), 'logcosh') | ||
) -> ConfigurationSpace: | ||
n_components = UniformIntegerHyperparameter( | ||
"n_components", lower=n_components[0][0], upper=n_components[0][1], default_value=n_components[1]) | ||
algorithm = CategoricalHyperparameter('algorithm', choices=algorithm[0], default_value=algorithm[1]) | ||
whiten = CategoricalHyperparameter('whiten', choices=whiten[0], default_value=whiten[1]) | ||
fun = CategoricalHyperparameter('fun', choices=fun[0], default_value=fun[1]) | ||
cs = ConfigurationSpace() | ||
cs.add_hyperparameters([n_components, algorithm, whiten, fun]) | ||
|
||
cs.add_condition(EqualsCondition(n_components, whiten, True)) | ||
|
||
return cs | ||
|
||
@staticmethod | ||
def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]: | ||
return {'shortname': 'FastICA', | ||
'name': 'Fast Independent Component Analysis', | ||
'handles_sparse': True | ||
} |
100 changes: 100 additions & 0 deletions
100
...ipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
from typing import Any, Dict, Optional, Tuple, Union | ||
|
||
from ConfigSpace.conditions import EqualsCondition, InCondition | ||
from ConfigSpace.configuration_space import ConfigurationSpace | ||
from ConfigSpace.hyperparameters import ( | ||
CategoricalHyperparameter, | ||
UniformFloatHyperparameter, | ||
UniformIntegerHyperparameter, | ||
) | ||
|
||
import numpy as np | ||
|
||
import sklearn.decomposition | ||
from sklearn.base import BaseEstimator | ||
|
||
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\ | ||
base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent | ||
from autoPyTorch.utils.common import FitRequirement | ||
|
||
|
||
class KernelPCA(autoPyTorchFeaturePreprocessingComponent): | ||
def __init__(self, n_components: int = 100, | ||
kernel: str = 'rbf', degree: int = 3, | ||
gamma: float = 0.01, coef0: float = 0.0, | ||
random_state: Optional[Union[int, np.random.RandomState]] = None | ||
) -> None: | ||
self.n_components = n_components | ||
self.kernel = kernel | ||
self.degree = degree | ||
self.gamma = gamma | ||
self.coef0 = coef0 | ||
self.random_state = random_state | ||
super().__init__() | ||
|
||
self.add_fit_requirements([ | ||
FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)]) | ||
|
||
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: | ||
|
||
self.preprocessor['numerical'] = sklearn.decomposition.KernelPCA( | ||
n_components=self.n_components, kernel=self.kernel, | ||
degree=self.degree, gamma=self.gamma, coef0=self.coef0, | ||
remove_zero_eig=True, random_state=self.random_state) | ||
# | ||
# # Raise an informative error message, equation is based ~line 249 in | ||
# # KernelPCA.py in scikit-learn | ||
# if len(self.preprocessor.alphas_ / self.preprocessor.lambdas_) == 0: | ||
# raise ValueError('KernelPCA removed all features!') | ||
|
||
return self | ||
|
||
@staticmethod | ||
def get_hyperparameter_search_space( | ||
dataset_properties: Optional[Dict[str, str]] = None, | ||
n_components: Tuple[Tuple, int] = ((10, 2000), 100), | ||
kernel: Tuple[Tuple, str] = (('poly', 'rbf', 'sigmoid', 'cosine'), 'rbf'), | ||
gamma: Tuple[Tuple, float, bool] = ((3.0517578125e-05, 8), 0.01, True), | ||
degree: Tuple[Tuple, int] = ((2, 5), 3), | ||
coef0: Tuple[Tuple, float] = ((-1, 1), 0) | ||
) -> ConfigurationSpace: | ||
n_components = UniformIntegerHyperparameter( | ||
"n_components", lower=n_components[0][0], upper=n_components[0][1], default_value=n_components[1]) | ||
kernel_hp = CategoricalHyperparameter('kernel', choices=kernel[0], default_value=kernel[1]) | ||
gamma = UniformFloatHyperparameter( | ||
"gamma", | ||
lower=gamma[0][0], upper=gamma[0][1], | ||
log=gamma[2], | ||
default_value=gamma[1], | ||
) | ||
coef0 = UniformFloatHyperparameter("coef0", lower=coef0[0][0], upper=coef0[0][1], default_value=coef0[1]) | ||
cs = ConfigurationSpace() | ||
cs.add_hyperparameters([n_components, kernel_hp, gamma, coef0]) | ||
|
||
if "poly" in kernel_hp.choices: | ||
degree = UniformIntegerHyperparameter('degree', lower=degree[0][0], upper=degree[0][1], | ||
default_value=degree[1]) | ||
cs.add_hyperparameters([degree]) | ||
degree_depends_on_poly = EqualsCondition(degree, kernel_hp, "poly") | ||
cs.add_conditions([degree_depends_on_poly]) | ||
kernels = [] | ||
if "sigmoid" in kernel_hp.choices: | ||
kernels.append("sigmoid") | ||
if "poly" in kernel_hp.choices: | ||
kernels.append("poly") | ||
coef0_condition = InCondition(coef0, kernel_hp, kernels) | ||
kernels = [] | ||
if "rbf" in kernel_hp.choices: | ||
kernels.append("rbf") | ||
if "poly" in kernel_hp.choices: | ||
kernels.append("poly") | ||
gamma_condition = InCondition(gamma, kernel_hp, kernels) | ||
cs.add_conditions([coef0_condition, gamma_condition]) | ||
return cs | ||
|
||
@staticmethod | ||
def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]: | ||
return {'shortname': 'KernelPCA', | ||
'name': 'Kernel Principal Component Analysis', | ||
'handles_sparse': True | ||
} |
52 changes: 52 additions & 0 deletions
52
...onents/preprocessing/tabular_preprocessing/feature_preprocessing/NoFeaturePreprocessor.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from typing import Any, Dict, Optional, Union | ||
|
||
import numpy as np | ||
|
||
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\ | ||
base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent | ||
|
||
|
||
class NoFeaturePreprocessor(autoPyTorchFeaturePreprocessingComponent): | ||
""" | ||
Don't perform feature preprocessing on categorical features | ||
""" | ||
def __init__(self, | ||
random_state: Optional[Union[np.random.RandomState, int]] = None | ||
): | ||
super().__init__() | ||
self.random_state = random_state | ||
|
||
def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchFeaturePreprocessingComponent: | ||
""" | ||
The fit function calls the fit function of the underlying model | ||
and returns the transformed array. | ||
Args: | ||
X (np.ndarray): input features | ||
y (Optional[np.ndarray]): input labels | ||
|
||
Returns: | ||
instance of self | ||
""" | ||
self.check_requirements(X, y) | ||
|
||
return self | ||
|
||
def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: | ||
""" | ||
Adds the self into the 'X' dictionary and returns it. | ||
Args: | ||
X (Dict[str, Any]): 'X' dictionary | ||
|
||
Returns: | ||
(Dict[str, Any]): the updated 'X' dictionary | ||
""" | ||
X.update({'feature_preprocessor': self.preprocessor}) | ||
return X | ||
|
||
@staticmethod | ||
def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]: | ||
return { | ||
'shortname': 'NoFeaturePreprocessing', | ||
'name': 'No Feature Preprocessing', | ||
'handles_sparse': True | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.