|
| 1 | +from math import ceil, floor |
| 2 | +from typing import Any, Dict, Optional, Tuple, Union |
| 3 | + |
| 4 | +from ConfigSpace.conditions import EqualsCondition, InCondition |
| 5 | +from ConfigSpace.configuration_space import ConfigurationSpace |
| 6 | +from ConfigSpace.hyperparameters import ( |
| 7 | + CategoricalHyperparameter, |
| 8 | + UniformFloatHyperparameter, |
| 9 | + UniformIntegerHyperparameter, |
| 10 | +) |
| 11 | + |
| 12 | +import numpy as np |
| 13 | + |
| 14 | +import sklearn.decomposition |
| 15 | +from sklearn.base import BaseEstimator |
| 16 | + |
| 17 | +from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\ |
| 18 | + base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent |
| 19 | +from autoPyTorch.utils.common import FitRequirement |
| 20 | + |
| 21 | + |
| 22 | +class KernelPCA(autoPyTorchFeaturePreprocessingComponent): |
| 23 | + def __init__(self, n_components: int = 10, |
| 24 | + kernel: str = 'rbf', degree: int = 3, |
| 25 | + gamma: float = 0.01, coef0: float = 0.0, |
| 26 | + random_state: Optional[Union[int, np.random.RandomState]] = None |
| 27 | + ) -> None: |
| 28 | + self.n_components = n_components |
| 29 | + self.kernel = kernel |
| 30 | + self.degree = degree |
| 31 | + self.gamma = gamma |
| 32 | + self.coef0 = coef0 |
| 33 | + self.random_state = random_state |
| 34 | + super().__init__() |
| 35 | + |
| 36 | + self.add_fit_requirements([ |
| 37 | + FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)]) |
| 38 | + |
| 39 | + def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: |
| 40 | + |
| 41 | + self.preprocessor['numerical'] = sklearn.decomposition.KernelPCA( |
| 42 | + n_components=self.n_components, kernel=self.kernel, |
| 43 | + degree=self.degree, gamma=self.gamma, coef0=self.coef0, |
| 44 | + remove_zero_eig=True, random_state=self.random_state) |
| 45 | + |
| 46 | + return self |
| 47 | + |
| 48 | + @staticmethod |
| 49 | + def get_hyperparameter_search_space( |
| 50 | + dataset_properties: Optional[Dict[str, str]] = None, |
| 51 | + n_components: Tuple[Tuple, float] = ((0.5, 0.9), 0.5), |
| 52 | + kernel: Tuple[Tuple, str] = (('poly', 'rbf', 'sigmoid', 'cosine'), 'rbf'), |
| 53 | + gamma: Tuple[Tuple, float, bool] = ((3.0517578125e-05, 8), 0.01, True), |
| 54 | + degree: Tuple[Tuple, int] = ((2, 5), 3), |
| 55 | + coef0: Tuple[Tuple, float] = ((-1, 1), 0) |
| 56 | + ) -> ConfigurationSpace: |
| 57 | + |
| 58 | + if dataset_properties is not None: |
| 59 | + n_features = len(dataset_properties['numerical_columns']) |
| 60 | + n_components = ((floor(n_components[0][0] * n_features), ceil(n_components[0][1] * n_features)), |
| 61 | + ceil(n_components[1] * n_features)) |
| 62 | + else: |
| 63 | + n_components = ((10, 2000), 100) |
| 64 | + |
| 65 | + n_components = UniformIntegerHyperparameter( |
| 66 | + "n_components", lower=n_components[0][0], upper=n_components[0][1], default_value=n_components[1]) |
| 67 | + kernel_hp = CategoricalHyperparameter('kernel', choices=kernel[0], default_value=kernel[1]) |
| 68 | + gamma = UniformFloatHyperparameter( |
| 69 | + "gamma", |
| 70 | + lower=gamma[0][0], upper=gamma[0][1], |
| 71 | + log=gamma[2], |
| 72 | + default_value=gamma[1], |
| 73 | + ) |
| 74 | + coef0 = UniformFloatHyperparameter("coef0", lower=coef0[0][0], upper=coef0[0][1], default_value=coef0[1]) |
| 75 | + cs = ConfigurationSpace() |
| 76 | + cs.add_hyperparameters([n_components, kernel_hp, gamma, coef0]) |
| 77 | + |
| 78 | + if "poly" in kernel_hp.choices: |
| 79 | + degree = UniformIntegerHyperparameter('degree', lower=degree[0][0], upper=degree[0][1], |
| 80 | + default_value=degree[1]) |
| 81 | + cs.add_hyperparameters([degree]) |
| 82 | + degree_depends_on_poly = EqualsCondition(degree, kernel_hp, "poly") |
| 83 | + cs.add_conditions([degree_depends_on_poly]) |
| 84 | + kernels = [] |
| 85 | + if "sigmoid" in kernel_hp.choices: |
| 86 | + kernels.append("sigmoid") |
| 87 | + if "poly" in kernel_hp.choices: |
| 88 | + kernels.append("poly") |
| 89 | + coef0_condition = InCondition(coef0, kernel_hp, kernels) |
| 90 | + kernels = [] |
| 91 | + if "rbf" in kernel_hp.choices: |
| 92 | + kernels.append("rbf") |
| 93 | + if "poly" in kernel_hp.choices: |
| 94 | + kernels.append("poly") |
| 95 | + gamma_condition = InCondition(gamma, kernel_hp, kernels) |
| 96 | + cs.add_conditions([coef0_condition, gamma_condition]) |
| 97 | + return cs |
| 98 | + |
| 99 | + @staticmethod |
| 100 | + def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]: |
| 101 | + return {'shortname': 'KernelPCA', |
| 102 | + 'name': 'Kernel Principal Component Analysis', |
| 103 | + 'handles_sparse': True |
| 104 | + } |
0 commit comments