From 82589346074e493cd3dce22da926bd85d1d4fcb8 Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Wed, 12 Jul 2023 17:05:53 +0100 Subject: [PATCH 1/3] Return data from the backend fit method --- alibi_detect/od/_gmm.py | 2 +- alibi_detect/od/_svm.py | 2 +- alibi_detect/od/pytorch/base.py | 10 +++++++- alibi_detect/od/pytorch/gmm.py | 2 +- alibi_detect/od/pytorch/svm.py | 2 +- alibi_detect/od/tests/test__gmm/test__gmm.py | 18 ++++++++++++- alibi_detect/od/tests/test__svm/test__svm.py | 27 +++++++++++++++++++- 7 files changed, 56 insertions(+), 7 deletions(-) diff --git a/alibi_detect/od/_gmm.py b/alibi_detect/od/_gmm.py index a4d494c82..87e54c3d1 100644 --- a/alibi_detect/od/_gmm.py +++ b/alibi_detect/od/_gmm.py @@ -126,7 +126,7 @@ def fit( verbose Verbosity level used to fit the detector. Used for both ``'sklearn'`` and ``'pytorch'`` backends. Defaults to ``0``. """ - self.backend.fit( + return self.backend.fit( self.backend._to_backend_dtype(x_ref), **self.backend.format_fit_kwargs(locals()) ) diff --git a/alibi_detect/od/_svm.py b/alibi_detect/od/_svm.py index 1696a067c..6213efa6d 100644 --- a/alibi_detect/od/_svm.py +++ b/alibi_detect/od/_svm.py @@ -143,7 +143,7 @@ def fit( Verbosity level during training. ``0`` is silent, ``1`` prints fit status. If using `bgd`, fit displays a progress bar. Otherwise, if using `sgd` then we output the Sklearn `SGDOneClassSVM.fit()` logs. """ - self.backend.fit( + return self.backend.fit( self.backend._to_backend_dtype(x_ref), **self.backend.format_fit_kwargs(locals()) ) diff --git a/alibi_detect/od/pytorch/base.py b/alibi_detect/od/pytorch/base.py index 747f6c995..ac521fb6b 100644 --- a/alibi_detect/od/pytorch/base.py +++ b/alibi_detect/od/pytorch/base.py @@ -31,6 +31,14 @@ def to_frontend_dtype(self): return result +def _tensor_to_frontend_dtype(x: Union[torch.Tensor, np.ndarray, float]) -> Union[np.ndarray, float]: + if isinstance(x, torch.Tensor): + x = x.cpu().detach().numpy() + if isinstance(x, np.ndarray) and x.ndim == 0: + x = x.item() + return x + + def _raise_type_error(x): raise TypeError(f'x is type={type(x)} but must be one of TorchOutlierDetectorOutput or a torch Tensor') @@ -52,7 +60,7 @@ def to_frontend_dtype(x: Union[torch.Tensor, TorchOutlierDetectorOutput]) -> Uni return { 'TorchOutlierDetectorOutput': lambda x: x.to_frontend_dtype(), - 'Tensor': lambda x: x.cpu().detach().numpy() + 'Tensor': _tensor_to_frontend_dtype }.get( x.__class__.__name__, _raise_type_error diff --git a/alibi_detect/od/pytorch/gmm.py b/alibi_detect/od/pytorch/gmm.py index ba4f3e47a..3e927ea29 100644 --- a/alibi_detect/od/pytorch/gmm.py +++ b/alibi_detect/od/pytorch/gmm.py @@ -132,7 +132,7 @@ def fit( # type: ignore[override] self._set_fitted() return { 'converged': converged, - 'lower_bound': min_loss, + 'lower_bound': self._to_frontend_dtype(min_loss), 'n_epochs': epoch } diff --git a/alibi_detect/od/pytorch/svm.py b/alibi_detect/od/pytorch/svm.py index 094499776..3701fa584 100644 --- a/alibi_detect/od/pytorch/svm.py +++ b/alibi_detect/od/pytorch/svm.py @@ -338,7 +338,7 @@ def fit( # type: ignore[override] self._set_fitted() return { 'converged': converged, - 'lower_bound': min_loss, + 'lower_bound': self._to_frontend_dtype(min_loss), 'n_iter': iter } diff --git a/alibi_detect/od/tests/test__gmm/test__gmm.py b/alibi_detect/od/tests/test__gmm/test__gmm.py index c210a134b..f3e6bea51 100644 --- a/alibi_detect/od/tests/test__gmm/test__gmm.py +++ b/alibi_detect/od/tests/test__gmm/test__gmm.py @@ -87,7 +87,7 @@ def test_gmm_integration(backend): gmm_detector = GMM(n_components=8, backend=backend) X_ref, _ = make_moons(1001, shuffle=True, noise=0.05, random_state=None) X_ref, x_inlier = X_ref[0:1000], X_ref[1000][None] - gmm_detector.fit(X_ref) + fit_logs = gmm_detector.fit(X_ref) gmm_detector.infer_threshold(X_ref, 0.1) result = gmm_detector.predict(x_inlier) result = result['data']['is_outlier'][0] @@ -117,3 +117,19 @@ def test_gmm_torchscript(tmp_path): ts_gmm = torch.load(tmp_path / 'gmm.pt') y = ts_gmm(x) assert torch.all(y == torch.tensor([False, True])) + + +@pytest.mark.parametrize('backend', ['pytorch', 'sklearn']) +def test_gmm_fit(backend): + """Test GMM detector fit method. + + Tests detector checks for convergence and stops early if it does. + """ + gmm = GMM(n_components=1, backend=backend) + mean = [8, 8] + cov = [[2., 0.], [0., 1.]] + x_ref = torch.tensor(np.random.multivariate_normal(mean, cov, 1000)) + fit_results = gmm.fit(x_ref, tol=0.01, batch_size=32) + assert isinstance(fit_results['lower_bound'], float) + assert fit_results['converged'] + assert fit_results['lower_bound'] < 1 diff --git a/alibi_detect/od/tests/test__svm/test__svm.py b/alibi_detect/od/tests/test__svm/test__svm.py index eb5e991bb..ce24a72f8 100644 --- a/alibi_detect/od/tests/test__svm/test__svm.py +++ b/alibi_detect/od/tests/test__svm/test__svm.py @@ -109,7 +109,7 @@ def test_fitted_svm_score(optimization): nu=0.1 ) x_ref = np.random.randn(100, 2) - svm_detector.fit(x_ref) + fit_logs = svm_detector.fit(x_ref) x = np.array([[0, 10], [0.1, 0]]) scores = svm_detector.score(x) @@ -207,3 +207,28 @@ def test_svm_torchscript(tmp_path): ts_svm = torch.load(tmp_path / 'svm.pt') y = ts_svm(x) assert torch.all(y == torch.tensor([False, True])) + + +@pytest.mark.parametrize('optimization', ['sgd', 'bgd']) +def test_svm_fit(optimization): + """Test SVM detector fit method. + + Tests pytorch detector checks for convergence and stops early if it does. + """ + kernel = GaussianRBF(torch.tensor(1.)) + svm = SVM( + n_components=10, + kernel=kernel, + nu=0.01, + optimization=optimization, + ) + mean = [8, 8] + cov = [[2., 0.], [0., 1.]] + x_ref = torch.tensor(np.random.multivariate_normal(mean, cov, 1000)) + fit_results = svm.fit(x_ref, tol=0.01) + assert fit_results['converged'] + assert fit_results['n_iter'] < 100 + assert fit_results.get('lower_bound', 0) < 1 + # 'sgd' optimization does not return lower bound + if optimization == 'bgd': + assert isinstance(fit_results['lower_bound'], float) From 1ce6e2321d71ed7898e5a3494780704ebb59ea1a Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Wed, 12 Jul 2023 17:46:18 +0100 Subject: [PATCH 2/3] Fix minor issues --- alibi_detect/od/pytorch/base.py | 2 +- alibi_detect/od/tests/test__gmm/test__gmm.py | 2 +- alibi_detect/od/tests/test__svm/test__svm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/alibi_detect/od/pytorch/base.py b/alibi_detect/od/pytorch/base.py index ac521fb6b..ccb1543ed 100644 --- a/alibi_detect/od/pytorch/base.py +++ b/alibi_detect/od/pytorch/base.py @@ -36,7 +36,7 @@ def _tensor_to_frontend_dtype(x: Union[torch.Tensor, np.ndarray, float]) -> Unio x = x.cpu().detach().numpy() if isinstance(x, np.ndarray) and x.ndim == 0: x = x.item() - return x + return x # type: ignore[return-value] def _raise_type_error(x): diff --git a/alibi_detect/od/tests/test__gmm/test__gmm.py b/alibi_detect/od/tests/test__gmm/test__gmm.py index f3e6bea51..c4d724557 100644 --- a/alibi_detect/od/tests/test__gmm/test__gmm.py +++ b/alibi_detect/od/tests/test__gmm/test__gmm.py @@ -87,7 +87,7 @@ def test_gmm_integration(backend): gmm_detector = GMM(n_components=8, backend=backend) X_ref, _ = make_moons(1001, shuffle=True, noise=0.05, random_state=None) X_ref, x_inlier = X_ref[0:1000], X_ref[1000][None] - fit_logs = gmm_detector.fit(X_ref) + gmm_detector.fit(X_ref) gmm_detector.infer_threshold(X_ref, 0.1) result = gmm_detector.predict(x_inlier) result = result['data']['is_outlier'][0] diff --git a/alibi_detect/od/tests/test__svm/test__svm.py b/alibi_detect/od/tests/test__svm/test__svm.py index ce24a72f8..f76cc4e58 100644 --- a/alibi_detect/od/tests/test__svm/test__svm.py +++ b/alibi_detect/od/tests/test__svm/test__svm.py @@ -109,7 +109,7 @@ def test_fitted_svm_score(optimization): nu=0.1 ) x_ref = np.random.randn(100, 2) - fit_logs = svm_detector.fit(x_ref) + svm_detector.fit(x_ref) x = np.array([[0, 10], [0.1, 0]]) scores = svm_detector.score(x) From 05ed926644a403304273caa93f56459dd6d160d3 Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Thu, 13 Jul 2023 10:04:58 +0100 Subject: [PATCH 3/3] Update docstrings for gmm and svm fit methods --- alibi_detect/od/_gmm.py | 8 ++++++++ alibi_detect/od/_svm.py | 7 +++++++ alibi_detect/od/pytorch/gmm.py | 4 ++-- alibi_detect/od/pytorch/svm.py | 2 +- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/alibi_detect/od/_gmm.py b/alibi_detect/od/_gmm.py index 87e54c3d1..be7ffa950 100644 --- a/alibi_detect/od/_gmm.py +++ b/alibi_detect/od/_gmm.py @@ -125,6 +125,14 @@ def fit( Defaults to ``'kmeans'``. verbose Verbosity level used to fit the detector. Used for both ``'sklearn'`` and ``'pytorch'`` backends. Defaults to ``0``. + + Returns + ------- + Dictionary with fit results. The dictionary contains the following keys depending on the backend used: + - converged: bool indicating whether EM algorithm converged. + - n_iter: number of EM iterations performed. Only returned if `backend` is ``'sklearn'``. + - n_epochs: number of gradient descent iterations performed. Only returned if `backend` is ``'pytorch'``. + - lower_bound: log-likelihood lower bound. """ return self.backend.fit( self.backend._to_backend_dtype(x_ref), diff --git a/alibi_detect/od/_svm.py b/alibi_detect/od/_svm.py index 6213efa6d..53a8066c7 100644 --- a/alibi_detect/od/_svm.py +++ b/alibi_detect/od/_svm.py @@ -142,6 +142,13 @@ def fit( verbose Verbosity level during training. ``0`` is silent, ``1`` prints fit status. If using `bgd`, fit displays a progress bar. Otherwise, if using `sgd` then we output the Sklearn `SGDOneClassSVM.fit()` logs. + + Returns + ------- + Dictionary with fit results. The dictionary contains the following keys depending on the optimization used: + - converged: `bool` indicating whether training converged. + - n_iter: number of iterations performed. + - lower_bound: loss lower bound. Only returned for the `bgd`. """ return self.backend.fit( self.backend._to_backend_dtype(x_ref), diff --git a/alibi_detect/od/pytorch/gmm.py b/alibi_detect/od/pytorch/gmm.py index 3e927ea29..474311b9a 100644 --- a/alibi_detect/od/pytorch/gmm.py +++ b/alibi_detect/od/pytorch/gmm.py @@ -75,8 +75,8 @@ def fit( # type: ignore[override] Returns ------- Dictionary with fit results. The dictionary contains the following keys: - - converged: bool indicating whether EM algorithm converged. - - n_iter: number of EM iterations performed. + - converged: bool indicating whether training converged. + - n_epochs: number of gradient descent iterations performed. - lower_bound: log-likelihood lower bound. """ self.model = GMMModel(self.n_components, x_ref.shape[-1]).to(self.device) diff --git a/alibi_detect/od/pytorch/svm.py b/alibi_detect/od/pytorch/svm.py index 3701fa584..081896beb 100644 --- a/alibi_detect/od/pytorch/svm.py +++ b/alibi_detect/od/pytorch/svm.py @@ -272,7 +272,7 @@ def fit( # type: ignore[override] Returns ------- Dictionary with fit results. The dictionary contains the following keys: - - converged: bool indicating whether training converged. + - converged: `bool` indicating whether training converged. - n_iter: number of iterations performed. - lower_bound: loss lower bound. """