From 9443dc52215eb7fa8ba8d9b8bc620a3cfaa4ff29 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 07:51:18 +0000 Subject: [PATCH 01/13] UPD: add new configuration with pre-commit/flask8 --- .flake8 | 7 +++++++ .pre-commit-config.yaml | 25 +++++++++++++++++++++++++ Makefile | 2 +- environment.ci.yml | 10 ++++++---- environment.dev.yml | 17 +++-------------- environment.doc.yml | 7 +++---- mypy.ini | 9 ++++++++- requirements.ci.txt | 8 +++++--- requirements.dev.txt | 13 ++----------- requirements.doc.txt | 3 --- setup.py | 2 +- 11 files changed, 61 insertions(+), 42 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..1446331f --- /dev/null +++ b/.flake8 @@ -0,0 +1,7 @@ +[flake8] +exclude = .git, .github, __pycache__ , .vscode, build +max-line-length = 99 +ignore = E302,E305,W503,E203,E731,E402,E266,E712,F401,F821 +indent-size = 4 +per-file-ignores = + */__init__.py:F401 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..52af7d73 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,25 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + exclude: (docs/) + - id: end-of-file-fixer + exclude: (docs/) + - id: trailing-whitespace + exclude: (docs/) + - repo: https://github.com/psf/black + rev: 22.8.0 + hooks: + - id: black + args: + - "-l 99" + # Flake8 + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.1.1 + hooks: + - id: mypy \ No newline at end of file diff --git a/Makefile b/Makefile index c6e15294..6623ffbc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: tests doc build lint: - flake8 . --exclude=doc + flake8 . --exclude=doc,build type-check: mypy mapie diff --git a/environment.ci.yml b/environment.ci.yml index 07f31c0a..4a0b2b98 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -4,9 +4,11 @@ channels: - conda-forge dependencies: - codecov - - flake8 + - flake8==4.0.1 - mypy - - pandas + - pytest - pytest-cov - - scikit-learn - - typed-ast + - pytest-mock + - twine + - wheel + - pandas diff --git a/environment.dev.yml b/environment.dev.yml index c86629ef..5485a0e7 100644 --- a/environment.dev.yml +++ b/environment.dev.yml @@ -4,19 +4,8 @@ channels: - conda-forge dependencies: - bump2version=1.0.1 - - flake8=4.0.1 - ipykernel=6.9.0 - jupyter=1.0.0 - - mypy=0.941 - - numpydoc=1.1.0 - - numpy=1.22.3 - - pandas=1.3.5 - - pytest=6.2.5 - - pytest-cov=3.0.0 - - python=3.10 - - scikit-learn - - sphinx=4.3.2 - - sphinx-gallery=0.10.1 - - sphinx_rtd_theme=1.0.0 - - twine=3.7.1 - - wheel=0.37.0 + - pandas + - matplotlib + - lightgbm diff --git a/environment.doc.yml b/environment.doc.yml index f6a0e6ce..a38494e9 100644 --- a/environment.doc.yml +++ b/environment.doc.yml @@ -3,12 +3,11 @@ channels: - defaults - conda-forge dependencies: - - lightgbm=3.2.1 - numpydoc=1.1.0 - - pandas=1.3.5 - - python=3.10 - - scikit-learn - sphinx=4.3.2 - sphinx-gallery=0.10.1 - sphinx_rtd_theme=1.0.0 - typing_extensions=4.0.1 + - pandas + - matplotlib + - lightgbm \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 358fc124..456cac42 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,5 @@ [mypy] -python_version = 3.9 +python_version = 3.10 ignore_missing_imports = True [mypy-sklearn.*] @@ -7,3 +7,10 @@ ignore_errors = True [mypy-doc.*] ignore_errors = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-numpy.*] +ignore_missing_imports = True +ignore_errors = True diff --git a/requirements.ci.txt b/requirements.ci.txt index 587a04f8..23aaac54 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -1,7 +1,9 @@ codecov -flake8 +flake8==4.0.1 mypy -pandas pytest pytest-cov -typed-ast +pytest-mock +pre-commit +twine +wheel diff --git a/requirements.dev.txt b/requirements.dev.txt index a5e94a60..8d93c882 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,16 +1,7 @@ bump2version==1.0.1 -flake8==4.0.1 ipykernel==6.9.0 jupyter==1.0.0 -mypy==0.941 -numpy==1.22.3 -numpydoc==1.1.0 -pandas==1.3.5 -pytest==6.2.5 -pytest-cov==3.0.0 -scikit-learn -sphinx==4.3.2 -sphinx-gallery==0.10.1 -sphinx_rtd_theme==1.0.0 +pandas +matplotlib twine==3.7.1 wheel==0.38.1 \ No newline at end of file diff --git a/requirements.doc.txt b/requirements.doc.txt index acff47a8..c6e16c35 100644 --- a/requirements.doc.txt +++ b/requirements.doc.txt @@ -1,7 +1,4 @@ -lightgbm==3.2.1 -matplotlib==3.5.1 numpydoc==1.1.0 -pandas==1.3.5 sphinx==4.3.2 sphinx-gallery==0.10.1 sphinx_rtd_theme==1.0.0 diff --git a/setup.py b/setup.py index 4668b445..84883651 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ ) PYTHON_REQUIRES = ">=3.7" PACKAGES = find_packages() -INSTALL_REQUIRES = ["scikit-learn", "scipy", "numpy>=1.21", "packaging"] +INSTALL_REQUIRES = ["scikit-learn", "packaging"] CLASSIFIERS = [ "Intended Audience :: Science/Research", "Intended Audience :: Developers", From 7dc8bf4a131a06a0e730f6f209406ef66c197b90 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 08:10:40 +0000 Subject: [PATCH 02/13] UPD: pre-commit changes --- .coveragerc | 2 +- .github/ISSUE_TEMPLATE/config.yml | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .pre-commit-config.yaml | 2 +- .readthedocs.yml | 2 +- MANIFEST.in | 2 +- Makefile | 2 +- README.rst | 4 +- doc/api.rst | 2 +- doc/conf.py | 16 +- doc/images/comp-methods.csv | 2 +- doc/index.rst | 2 +- doc/notebooks_multilabel_classification.rst | 4 +- doc/notebooks_regression.rst | 2 - doc/quick_start.rst | 6 +- ...ical_description_binary_classification.rst | 8 +- doc/theoretical_description_calibration.rst | 20 +- ...theoretical_description_classification.rst | 78 +- ...oretical_description_conformity_scores.rst | 2 +- ..._description_multilabel_classification.rst | 6 +- doc/theoretical_description_regression.rst | 168 ++-- environment.doc.yml | 2 +- examples/README.rst | 2 +- examples/calibration/1-quickstart/README.rst | 2 +- .../plot_calibration_hypothesis_testing.py | 33 +- .../2-advanced-analysis/README.rst | 2 +- ...plot_asymptotic_convergence_of_p_values.py | 25 +- examples/calibration/README.rst | 2 +- .../classification/1-quickstart/README.rst | 2 +- .../plot_comp_methods_on_2d_dataset.py | 30 +- .../2-advanced-analysis/README.rst | 2 +- .../3-scientific-articles/README.rst | 2 +- .../plot_sadinle2019_example.py | 13 +- .../classification/4-tutorials/README.rst | 2 +- .../4-tutorials/plot_crossconformal.py | 174 ++-- ...lot_main-tutorial-binary-classification.py | 82 +- .../plot_main-tutorial-classification.py | 94 +- examples/classification/README.rst | 2 +- .../1-quickstart/README.rst | 2 +- ...plot_tutorial_multilabel_classification.py | 109 +-- examples/multilabel_classification/README.rst | 2 +- examples/regression/1-quickstart/README.rst | 2 +- .../plot_compare_conformity_scores.py | 24 +- .../plot_heteroscedastic_1d_data.py | 27 +- .../plot_homoscedastic_1d_data.py | 27 +- .../regression/1-quickstart/plot_prefit.py | 48 +- .../1-quickstart/plot_timeseries_example.py | 16 +- .../regression/1-quickstart/plot_toy_model.py | 3 +- .../regression/2-advanced-analysis/README.rst | 2 +- .../plot_both_uncertainties.py | 16 +- .../plot_conditional_coverage.py | 102 +- .../plot_conformal_predictive_distribution.py | 40 +- .../2-advanced-analysis/plot_nested-cv.py | 28 +- .../plot_timeseries_enbpi.py | 36 +- .../3-scientific-articles/README.rst | 2 +- .../plot_barber2020_simulations.py | 20 +- .../plot_kim2020_simulations.py | 25 +- examples/regression/4-tutorials/README.rst | 2 +- .../4-tutorials/plot_cqr_tutorial.py | 159 ++-- .../plot_main-tutorial-regression.py | 224 ++--- ...lot_residual-normalised-score-tutorial.py} | 89 +- .../4-tutorials/plot_ts-tutorial.py | 87 +- examples/regression/README.rst | 2 +- mapie/__init__.py | 7 +- mapie/_compatibility.py | 20 +- mapie/calibration.py | 68 +- mapie/classification.py | 500 +++------- mapie/conformity_scores/__init__.py | 10 +- mapie/conformity_scores/conformity_scores.py | 51 +- .../residual_conformity_scores.py | 89 +- mapie/control_risk/crc_rcps.py | 100 +- mapie/control_risk/ltt.py | 14 +- mapie/control_risk/p_values.py | 45 +- mapie/control_risk/risks.py | 46 +- mapie/estimator/estimator.py | 46 +- mapie/estimator/interface.py | 10 +- mapie/metrics.py | 226 ++--- mapie/multi_label_classification.py | 140 +-- mapie/regression/__init__.py | 6 +- mapie/regression/quantile_regression.py | 163 +--- mapie/regression/regression.py | 99 +- mapie/regression/time_series_regression.py | 31 +- mapie/subsample.py | 36 +- mapie/tests/test_calibration.py | 173 +--- mapie/tests/test_classification.py | 876 +++++------------- mapie/tests/test_common.py | 37 +- mapie/tests/test_conformity_scores.py | 199 ++-- mapie/tests/test_control_risk.py | 59 +- mapie/tests/test_metrics.py | 305 +++--- .../tests/test_multi_label_classification.py | 384 +++----- mapie/tests/test_quantile_regression.py | 322 ++----- mapie/tests/test_regression.py | 138 +-- mapie/tests/test_subsample.py | 8 +- mapie/tests/test_time_series_regression.py | 66 +- mapie/tests/test_utils.py | 134 ++- mapie/utils.py | 196 ++-- notebooks/Makefile | 2 +- notebooks/classification/Cifar10.md | 198 ++-- .../classification/tutorial_classification.md | 14 +- notebooks/regression/exoplanets.md | 4 +- notebooks/regression/ts-changepoint.md | 28 +- notebooks/regression/tutorial_regression.md | 100 +- requirements.dev.txt | 2 +- requirements.doc.txt | 2 +- setup.py | 17 +- 105 files changed, 2261 insertions(+), 4609 deletions(-) rename examples/regression/4-tutorials/{plot_ResidualNormalisedScore_tutorial.py => plot_residual-normalised-score-tutorial.py} (81%) diff --git a/.coveragerc b/.coveragerc index 138dc486..1aacd2dd 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,4 +2,4 @@ omit = mapie/_compatibility.py [report] -omit = mapie/_compatibility.py \ No newline at end of file +omit = mapie/_compatibility.py diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index ec4bb386..3ba13e0c 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1 +1 @@ -blank_issues_enabled: false \ No newline at end of file +blank_issues_enabled: false diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d58c8e77..b851bbc4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -28,4 +28,4 @@ Please describe the tests that you ran to verify your changes. Provide instructi - [ ] Typing passes successfully : `make type-check` - [ ] Unit tests pass successfully : `make tests` - [ ] Coverage is 100% : `make coverage` -- [ ] Documentation builds successfully : `make doc` \ No newline at end of file +- [ ] Documentation builds successfully : `make doc` diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 52af7d73..615d247f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,4 +22,4 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.1.1 hooks: - - id: mypy \ No newline at end of file + - id: mypy diff --git a/.readthedocs.yml b/.readthedocs.yml index b7ba6045..46e88071 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -12,7 +12,7 @@ python: conda: environment: environment.doc.yml - + sphinx: builder: html configuration: doc/conf.py diff --git a/MANIFEST.in b/MANIFEST.in index 6125ce68..9b497712 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ include LICENSE include AUTHORS.rst recursive-exclude doc * -recursive-include examples *.py \ No newline at end of file +recursive-include examples *.py diff --git a/Makefile b/Makefile index 6623ffbc..34e1d0bd 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: tests doc build -lint: +lint: flake8 . --exclude=doc,build type-check: diff --git a/README.rst b/README.rst index 3d6478c5..fd25ff93 100644 --- a/README.rst +++ b/README.rst @@ -68,7 +68,7 @@ Here's a quick instantiation of MAPIE models for regression and classification p Implemented methods in **MAPIE** respect three fundamental pillars: -- They are **model and use case agnostic**, +- They are **model and use case agnostic**, - They possess **theoretical guarantees** under minimal assumptions on the data and the model, - They are based on **peer-reviewed algorithms** and respect programming standards. @@ -168,7 +168,7 @@ For more information on the contribution process, please go `here 1-\alpha`,:math:`K`,:math:`K \times n_{test}` **Conformalized quantile regressor**,:math:`\geq 1-\alpha`,:math:`\gtrsim 1-\alpha`,:math:`3`,:math:`3 \times n_{test}` -**EnbPI**,:math:`\geq 1-\alpha` (asymptotic),:math:`\gtrsim 1-\alpha`,:math:`K`,:math:`K \times n_{test}` \ No newline at end of file +**EnbPI**,:math:`\geq 1-\alpha` (asymptotic),:math:`\gtrsim 1-\alpha`,:math:`K`,:math:`K \times n_{test}` diff --git a/doc/index.rst b/doc/index.rst index ef5576bb..7be63c1e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,4 +1,4 @@ -.. include:: ../README.rst +.. include:: ../README.rst .. toctree:: :maxdepth: 2 diff --git a/doc/notebooks_multilabel_classification.rst b/doc/notebooks_multilabel_classification.rst index 91380994..92831a5d 100644 --- a/doc/notebooks_multilabel_classification.rst +++ b/doc/notebooks_multilabel_classification.rst @@ -2,11 +2,11 @@ Multi-label Classification notebooks =========================== The following examples present advanced analyses -on multi-label classification problems with different +on multi-label classification problems with different methods proposed in MAPIE. 1. Overview of Recall Control for Multi-Label Classification : `notebook `_ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 2. Overview of Precision Control for Multi-Label Classification : `notebook `_ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ diff --git a/doc/notebooks_regression.rst b/doc/notebooks_regression.rst index 8c303894..b02e7d6d 100755 --- a/doc/notebooks_regression.rst +++ b/doc/notebooks_regression.rst @@ -14,5 +14,3 @@ This section lists a series of Jupyter notebooks hosted on the MAPIE Github repo 3. Estimating prediction intervals for time series forecast with EnbPI : `notebook `_ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - - diff --git a/doc/quick_start.rst b/doc/quick_start.rst index 31e2efa9..bc983f77 100644 --- a/doc/quick_start.rst +++ b/doc/quick_start.rst @@ -35,7 +35,7 @@ To install directly from the github repository : 2. Run MapieRegressor --------------------- -Let us start with a basic regression problem. +Let us start with a basic regression problem. Here, we generate one-dimensional noisy data that we fit with a linear model. .. code:: python @@ -71,12 +71,12 @@ for each desired alpha value. You can compute the coverage of your prediction intervals. .. code:: python - + from mapie.metrics import regression_coverage_score_v2 coverage_scores = regression_coverage_score_v2(y_test, y_pis) -The estimated prediction intervals can then be plotted as follows. +The estimated prediction intervals can then be plotted as follows. .. code:: python diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst index c9b29b2f..6709b5ce 100644 --- a/doc/theoretical_description_binary_classification.rst +++ b/doc/theoretical_description_binary_classification.rst @@ -9,7 +9,7 @@ Theoretical Description There are mainly three different ways to handle uncertainty quantification in binary classification: calibration (see :doc:`theoretical_description_calibration`), confidence interval (CI) for the probability :math:`P(Y \vert \hat{\mu}(X))` and prediction sets (see :doc:`theoretical_description_classification`). -These 3 notions are tightly related for score-based classifier, as it is shown in [1]. +These 3 notions are tightly related for score-based classifier, as it is shown in [1]. Prediction sets can be computed in the same way for multiclass and binary classification with :class:`~mapie.calibration.MapieClassifier`, and there are the same theoretical guarantees. @@ -40,7 +40,7 @@ Definition 1 (Prediction Set (PS) w.r.t :math:`f`) [1]. Define the set of all subsets of :math:`\mathcal{Y}`, :math:`L = \{\{0\}, \{1\}, \{0, 1\}, \emptyset\}`. A function :math:`S:[0,1]\to\mathcal{L}` is said to be :math:`(1-\alpha)`-PS with respect to :math:`\hat{\mu}` if: -.. math:: +.. math:: P(Y\in S(\hat{\mu}(X))) \geq 1 - \alpha PSs are typically studied for larger output sets, such as :math:`\mathcal{Y}_{regression}=\mathbb{R}` or @@ -57,7 +57,7 @@ Definition 2 (Confidence Interval (CI) w.r.t :math:`\hat{\mu}`) [1]. Let :math:`I` denote the set of all subintervals of :math:`[0,1]`. A function :math:`C:[0,1]\to\mathcal{I}` is said to be :math:`(1-\alpha)`-CI with respect to :math:`\hat{\mu}` if: -.. math:: +.. math:: P(\mathbb{E}[Y|\hat{\mu}(X)]\in C(\hat{\mu}(X))) \geq 1 - \alpha In the framework of conformal prediction, the Venn predictor has this property. @@ -74,7 +74,7 @@ Definition 3 (Approximate calibration) [1]. The predictor :math:`\hat{\mu}:\mathcal{X} \to [0, 1]` is :math:`(\epsilon,\alpha)`-calibrated for some :math:`\epsilon,\alpha\in[0, 1]` if with probability at least :math:`1-\alpha`: -.. math:: +.. math:: |\mathbb{E}[Y|\hat{\mu}(X)] - \hat{\mu}(X)| \leq \epsilon See :class:`~sklearn.calibration.CalibratedClassifierCV` or :class:`~mapie.calibration.MapieCalibrator` diff --git a/doc/theoretical_description_calibration.rst b/doc/theoretical_description_calibration.rst index f1b64079..90101611 100644 --- a/doc/theoretical_description_calibration.rst +++ b/doc/theoretical_description_calibration.rst @@ -25,7 +25,7 @@ true probability compared to the original output. Firstly, we introduce binary calibration, we denote the :math:`(h(X), y)` pair as the score and ground truth for the object. Hence, :math:`y` values are in :math:`{0, 1}`. The model is calibrated if for every output :math:`q \in [0, 1]`, we have: -.. math:: +.. math:: Pr(Y = 1 \mid h(X) = q) = q where :math:`h()` is the score predictor. @@ -46,7 +46,7 @@ highest score and the corresponding class, whereas confidence calibration only c Let :math:`c` be the classifier and :math:`h` be the maximum score from the classifier. The couple :math:`(c, h)` is calibrated according to Top-Label calibration if: -.. math:: +.. math:: Pr(Y = c(X) \mid h(X), c(X)) = h(X) @@ -70,7 +70,7 @@ The ECE is the combination of these two metrics combined together. \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| acc(B_m) - conf(B_m) \right| In simple terms, once all the different bins from the confidence scores have been created, we check the mean accuracy of each bin. -The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. +The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. **Top-Label ECE** @@ -84,7 +84,7 @@ of the accuracy and confidence based on the top label and take the average ECE f Kolmogorov-Smirnov test was derived in [2, 3, 4]. The idea is to consider the cumulative differences between sorted scores :math:`s_i` and their corresponding labels :math:`y_i` and to compare its properties to that of a standard Brownian motion. Let us consider the -cumulative differences on sorted scores: +cumulative differences on sorted scores: .. math:: C_k = \frac{1}{N}\sum_{i=1}^k (s_i - y_i) @@ -94,13 +94,13 @@ We also introduce a typical normalization scale :math:`\sigma`: .. math:: \sigma = \frac{1}{N}\sqrt{\sum_{i=1}^N s_i(1 - s_i)} -Tho Kolmogorov-Smirnov statisitc is then defined as : +Tho Kolmogorov-Smirnov statisitc is then defined as : .. math:: G = \max|C_k|/\sigma It can be shown [2] that, under the null hypothesis of well calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) -converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form +converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form formulas for the cumulative distribution function (CDF) of the maximum absolute value of such a standard Brownian motion. So we state the p-value associated to the statistical test of well calibration as: @@ -115,7 +115,7 @@ Kuiper test was derived in [2, 3, 4] and is very similar to Kolmogorov-Smirnov. H = (\max_k|C_k| - \min_k|C_k|)/\sigma It can be shown [2] that, under the null hypothesis of well calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) -converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form +converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form formulas for the cumulative distribution function (CDF) of the range of such a standard Brownian motion. So we state the p-value associated to the statistical test of well calibration as: @@ -124,7 +124,7 @@ So we state the p-value associated to the statistical test of well calibration a **Spiegelhalter test** -Spiegelhalter test was derived in [6]. It is basically based on a decomposition of the Brier score: +Spiegelhalter test was derived in [6]. It is basically based on a decomposition of the Brier score: .. math:: B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)^2 @@ -141,7 +141,7 @@ computed as: .. math:: Var(B) = \frac{1}{N^2}\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i) -So we can build a Z-score as follows: +So we can build a Z-score as follows: .. math:: Z = \frac{B - E(B)}{\sqrt{Var(B)}} = \frac{\sum_{i=1}^N(y_i - s_i)(1 - 2s_i)}{\sqrt{\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)}} @@ -183,4 +183,4 @@ September, 1951. [6] Spiegelhalter DJ. Probabilistic prediction in patient management and clinical trials. Statistics in medicine. -1986 Sep;5(5):421-33. \ No newline at end of file +1986 Sep;5(5):421-33. diff --git a/doc/theoretical_description_classification.rst b/doc/theoretical_description_classification.rst index c5fd4806..6e90ff44 100644 --- a/doc/theoretical_description_classification.rst +++ b/doc/theoretical_description_classification.rst @@ -9,7 +9,7 @@ Theoretical Description Three methods for multi-class uncertainty-quantification have been implemented in MAPIE so far : LAC (that stands for Least Ambiguous set-valued Classifier) [1], Adaptive Prediction Sets [2, 3] and Top-K [3]. -The difference between these methods is the way the conformity scores are computed. +The difference between these methods is the way the conformity scores are computed. The figure below illustrates the three methods implemented in MAPIE: .. image:: images/classification_methods.png @@ -17,11 +17,11 @@ The figure below illustrates the three methods implemented in MAPIE: :align: center For a classification problem in a standard independent and identically distributed (i.i.d) case, -our training data :math:`(X, Y) = \{(x_1, y_1), \ldots, (x_n, y_n)\}`` has an unknown distribution :math:`P_{X, Y}`. +our training data :math:`(X, Y) = \{(x_1, y_1), \ldots, (x_n, y_n)\}`` has an unknown distribution :math:`P_{X, Y}`. For any risk level :math:`\alpha` between 0 and 1, the methods implemented in MAPIE allow the user to construct a prediction set :math:`\hat{C}_{n, \alpha}(X_{n+1})` for a new observation :math:`\left( X_{n+1},Y_{n+1} \right)` with a guarantee -on the marginal coverage such that : +on the marginal coverage such that : .. math:: P \{Y_{n+1} \in \hat{C}_{n, \alpha}(X_{n+1}) \} \geq 1 - \alpha @@ -30,26 +30,26 @@ on the marginal coverage such that : In words, for a typical risk level :math:`\alpha` of :math:`10 \%`, we want to construct prediction sets that contain the true observations for at least :math:`90 \%` of the new test data points. Note that the guarantee is possible only on the marginal coverage, and not on the conditional coverage -:math:`P \{Y_{n+1} \in \hat{C}_{n, \alpha}(X_{n+1}) | X_{n+1} = x_{n+1} \}` which depends on the location of the new test point in the distribution. +:math:`P \{Y_{n+1} \in \hat{C}_{n, \alpha}(X_{n+1}) | X_{n+1} = x_{n+1} \}` which depends on the location of the new test point in the distribution. 1. LAC ------ -In the LAC method, the conformity score is defined as as one minus the score of the true label. For each point :math:`i` of the calibration set : +In the LAC method, the conformity score is defined as as one minus the score of the true label. For each point :math:`i` of the calibration set : -.. math:: +.. math:: s_i(X_i, Y_i) = 1 - \hat{\mu}(X_i)_{Y_i} Once the conformity scores :math:`{s_1, ..., s_n}` are estimated for all calibration points, we compute the :math:`(n+1)*(1-\alpha)/n` quantile -:math:`\hat{q}` as follows : +:math:`\hat{q}` as follows : -.. math:: +.. math:: \hat{q} = Quantile \left(s_1, ..., s_n ; \frac{\lceil(n+1)(1-\alpha)\rceil}{n}\right) \\ Finally, we construct a prediction set by including all labels with a score higher than the estimated quantile : -.. math:: +.. math:: \hat{C}(X_{test}) = \{y : \hat{\mu}(X_{test})_y \geq 1 - \hat{q}\} @@ -63,20 +63,20 @@ for example at the border between two classes. Introduced in [3], the specificity of the Top-K method is that it will give the same prediction set size for all observations. The conformity score is the rank of the true label, with scores ranked from higher to lower. -The prediction sets are built by taking the :math:`\hat{q}^{th}` higher scores. The procedure is described in the following equations : +The prediction sets are built by taking the :math:`\hat{q}^{th}` higher scores. The procedure is described in the following equations : -.. math:: +.. math:: s_i(X_i, Y_i) = j \quad \text{where} \quad Y_i = \pi_j \quad \text{and} \quad \hat{\mu}(X_i)_{\pi_1} > ... > \hat{\mu}(X_i)_{\pi_j} > ... > \hat{\mu}(X_i)_{\pi_n} -.. math:: +.. math:: \hat{q} = \left \lceil Quantile \left(s_1, ..., s_n ; \frac{\lceil(n+1)(1-\alpha)\rceil}{n}\right) \right\rceil -.. math:: - \hat{C}(X_{test}) = \{\pi_1, ..., \pi_{\hat{q}}\} +.. math:: + \hat{C}(X_{test}) = \{\pi_1, ..., \pi_{\hat{q}}\} -As the other methods, this procedure allows the user to build prediction sets with guarantee on the marginal coverage. +As the other methods, this procedure allows the user to build prediction sets with guarantee on the marginal coverage. 3. Adaptive Prediction Sets (APS) @@ -87,16 +87,16 @@ prediction sets which are by definition non-empty. The conformity scores are computed by summing the ranked scores of each label, from the higher to the lower until reaching the true label of the observation : -.. math:: - s_i(X_i, Y_i) = \sum^k_{j=1} \hat{\mu}(X_i)_{\pi_j} \quad \text{where} \quad Y_i = \pi_k +.. math:: + s_i(X_i, Y_i) = \sum^k_{j=1} \hat{\mu}(X_i)_{\pi_j} \quad \text{where} \quad Y_i = \pi_k The quantile :math:`\hat{q}` is then computed the same way as the LAC method. For the construction of the prediction sets for a new test point, the same procedure of ranked summing is applied until reaching the quantile, -as described in the following equation : +as described in the following equation : -.. math:: +.. math:: \hat{C}(X_{test}) = \{\pi_1, ..., \pi_k\} \quad \text{where} \quad k = \text{inf}\{k : \sum^k_{j=1} \hat{\mu}(X_{test})_{\pi_j} \geq \hat{q}\} @@ -109,11 +109,11 @@ coverage remains close to the target (marginal) coverage. We refer the reader to 4. Regularized Adaptive Prediction Sets (RAPS) ---------------------------------------------- -The RAPS method which stands for Regularized Adaptive Prediction Sets, is an improvement made by Angelopoulos et al. in +The RAPS method which stands for Regularized Adaptive Prediction Sets, is an improvement made by Angelopoulos et al. in [3]. This regularization is able to overcome the very large prediction sets given by the APS method. The conformity scores are computed by summing the regularized ranked scores of each label, from the higher to the lower until reaching the true label of the observation : -.. math:: +.. math:: s_i(X_i, Y_i) = \sum^k_{j=1} \hat{\mu}(X_i)_{\pi_j} + \lambda (k-k_{reg})^+ \quad \text{where} \quad Y_i = \pi_k Where: @@ -124,8 +124,8 @@ Where: - :math:`\lambda` is a regularization parameter whose calculation we will explain next. The optimizations of :math:`k_{reg}` and :math:`\lambda` requires an extra data-splitting (by default, 20% of the calibration data). To choose :math:`k_{reg}`, -we simply run the Top-K method over this new split. For the choice of :math:`\lambda`, we follow the guidelines of [3] and try to find the value of -lambda such that it minimizes the size of the prediction sets. A simple grid search if done on different values of :math:`\lambda` (to be consistent +we simply run the Top-K method over this new split. For the choice of :math:`\lambda`, we follow the guidelines of [3] and try to find the value of +lambda such that it minimizes the size of the prediction sets. A simple grid search if done on different values of :math:`\lambda` (to be consistent with Angelopoulos et al., we choose :math:`\lambda \in \{0.001, 0.01, 0.1, 0.2, 0.5 \}`). For the construction of the prediction set for a new test point, the following procedure is applied: @@ -133,7 +133,7 @@ For the construction of the prediction set for a new test point, the following p .. math:: \hat{C}(X_{test}) = \{\pi_1, ..., \pi_k\} \quad \text{where} \quad k = \text{inf}\{k : \sum^k_{j=1} \hat{\mu}(X_{test})_{\pi_j} + \lambda(k-k_{reg})^+ \geq \hat{q}\} -Intuitively, the goal of the method is to penalize the prediction sets whose size are greater than the optimal prediction set size. The level of this +Intuitively, the goal of the method is to penalize the prediction sets whose size are greater than the optimal prediction set size. The level of this regularization is controlled by the parameter :math:`\lambda`. Despite that RAPS method has relatively small set size, its coverage tends to be higher than the one required (especially for high values of @@ -156,22 +156,22 @@ The implementation of the cross-conformal method follows algorithm 2 of [2]. In short, conformity scores are calculated for all training instances in a cross-validation fashion from their corresponding out-of-fold models. By analogy with the CV+ method for regression, estimating the prediction sets is performed in four main steps: -- We split the training set into *K* disjoint subsets :math:`S_1, S_2, ..., S_K` of equal size. - -- *K* regression functions :math:`\hat{\mu}_{-S_k}` are fitted on the training set with the +- We split the training set into *K* disjoint subsets :math:`S_1, S_2, ..., S_K` of equal size. + +- *K* regression functions :math:`\hat{\mu}_{-S_k}` are fitted on the training set with the corresponding :math:`k^{th}` fold removed. -- The corresponding *out-of-fold* conformity score is computed for each :math:`i^{th}` point +- The corresponding *out-of-fold* conformity score is computed for each :math:`i^{th}` point - Compare the conformity scores of training instances with the scores of each label for each new test point in order to - decide whether or not the label should be included in the prediction set. - For the APS method, the prediction set is constructed as follows (see equation 11 of [3]) : + decide whether or not the label should be included in the prediction set. + For the APS method, the prediction set is constructed as follows (see equation 11 of [3]) : -.. math:: - C_{n, \alpha}(X_{n+1}) = +.. math:: + C_{n, \alpha}(X_{n+1}) = \Big\{ y \in \mathcal{Y} : \sum_{i=1}^n {\rm 1} \Big[ E(X_i, Y_i, U_i; \hat{\pi}^{k(i)}) < E(X_{n+1}, y, U_{n+1}; \hat{\pi}^{k(i)}) \Big] < (1-\alpha)(n+1) \Big\} -where : +where : - :math:`E(X_i, Y_i, U_i; \hat{\pi}^{k(i)})` is the conformity score of training instance :math:`i` @@ -184,18 +184,18 @@ where : .. for estimating predictions sets, i.e. a set of possibilities that include the true label .. with a given confidence level. .. The full-conformal methods being computationally intractable, we will focus on the split- -.. and cross-conformal methods. +.. and cross-conformal methods. .. Before describing the methods, let's briefly present the mathematical setting. .. For a classification problem in a standard independent and identically distributed .. (i.i.d) case, our training data :math:`(X, Y) = \{(x_1, y_1), \ldots, (x_n, y_n)\}` -.. has an unknown distribution :math:`P_{X, Y}`. +.. has an unknown distribution :math:`P_{X, Y}`. .. Given some target quantile :math:`\alpha` or associated target coverage level :math:`1-\alpha`, .. we aim at constructing a set of possible labels :math:`\hat{T}_{n, \alpha} \in {1, ..., K}` -.. for a new feature vector :math:`X_{n+1}` such that +.. for a new feature vector :math:`X_{n+1}` such that -.. .. math:: +.. .. math:: .. P \{Y_{n+1} \in \hat{T}_{n, \alpha}(X_{n+1}) \} \geq 1 - \alpha @@ -207,11 +207,11 @@ where : .. and compared with the conformity scores of new test points output by the base model to assess .. whether a label must be included in the prediction set -.. - The split-conformal methodology can be summarized in the scheme below : +.. - The split-conformal methodology can be summarized in the scheme below : .. - The training set is first split into a training set and a calibration set .. - The training set is used for training the model .. - The calibration set is only used for getting distribution of conformity scores output by -.. the model trained only on the training set. +.. the model trained only on the training set. .. 2. The "score" method diff --git a/doc/theoretical_description_conformity_scores.rst b/doc/theoretical_description_conformity_scores.rst index 801a5337..095e0883 100644 --- a/doc/theoretical_description_conformity_scores.rst +++ b/doc/theoretical_description_conformity_scores.rst @@ -9,7 +9,7 @@ Theoretical Description for conformity scores The :class:`mapie.conformity_scores.ConformityScore` class implements various methods to compute conformity scores for regression. We give here a brief theoretical description of the scores included in the module. -Note that it is possible for the user to create any conformal scores that are not +Note that it is possible for the user to create any conformal scores that are not already included in MAPIE by inheriting this class. Before describing the methods, let's briefly present the mathematical setting. diff --git a/doc/theoretical_description_multilabel_classification.rst b/doc/theoretical_description_multilabel_classification.rst index 65c2d642..8d1c7539 100644 --- a/doc/theoretical_description_multilabel_classification.rst +++ b/doc/theoretical_description_multilabel_classification.rst @@ -9,10 +9,10 @@ Theoretical Description Three methods for multi-label uncertainty-quantification have been implemented in MAPIE so far : Risk-Controlling Prediction Sets (RCPS) [1], Conformal Risk Control (CRC) [2] and Learn Then Test (LTT) [3]. -The difference between these methods is the way the conformity scores are computed. +The difference between these methods is the way the conformity scores are computed. For a multi-label classification problem in a standard independent and identically distributed (i.i.d) case, -our training data :math:`(X, Y) = \{(x_1, y_1), \ldots, (x_n, y_n)\}`` has an unknown distribution :math:`P_{X, Y}`. +our training data :math:`(X, Y) = \{(x_1, y_1), \ldots, (x_n, y_n)\}`` has an unknown distribution :math:`P_{X, Y}`. For any risk level :math:`\alpha` between 0 and 1, the methods implemented in MAPIE allow the user to construct a prediction set :math:`\hat{C}_{n, \alpha}(X_{n+1})` for a new observation :math:`\left( X_{n+1},Y_{n+1} \right)` with a guarantee @@ -190,7 +190,7 @@ In order to find all the parameters :math:`\lambda` that satisfy the above condi :math:`\{(x_1, y_1), \dots, (x_n, y_n)\}`. - For each :math:`\lambda_j` in a discrete set :math:`\Lambda = \{\lambda_1, \lambda_2,\dots, \lambda_n\}`, we associate the null hypothesis - :math:`\mathcal{H}_j: R(\lambda_j) > \alpha`, as rejecting the hypothesis corresponds to selecting :math:`\lambda_j` as a point where risk the risk + :math:`\mathcal{H}_j: R(\lambda_j) > \alpha`, as rejecting the hypothesis corresponds to selecting :math:`\lambda_j` as a point where risk the risk is controlled. - For each null hypothesis, we compute a valid p-value using a concentration inequality :math:`p_{\lambda_j}`. Here we choose to compute the Hoeffding-Bentkus p-value diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index 45f86b1a..67a5a9ce 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -8,9 +8,9 @@ Theoretical Description The :class:`mapie.regression.MapieRegressor` class uses various resampling methods based on the jackknife strategy -recently introduced by Foygel-Barber et al. (2020) [1]. +recently introduced by Foygel-Barber et al. (2020) [1]. They allow the user to estimate robust prediction intervals with any kind of -machine learning model for regression purposes on single-output data. +machine learning model for regression purposes on single-output data. We give here a brief theoretical description of the methods included in the module. Before describing the methods, let's briefly present the mathematical setting. @@ -18,10 +18,10 @@ For a regression problem in a standard independent and identically distributed (i.i.d) case, our training data :math:`(X, Y) = \{(x_1, y_1), \ldots, (x_n, y_n)\}` has an unknown distribution :math:`P_{X, Y}`. We can assume that :math:`Y = \mu(X)+\epsilon` where :math:`\mu` is the model function we want to determine and -:math:`\epsilon_i \sim P_{Y \vert X}` is the noise. +:math:`\epsilon_i \sim P_{Y \vert X}` is the noise. Given some target quantile :math:`\alpha` or associated target coverage level :math:`1-\alpha`, we aim at constructing a prediction interval :math:`\hat{C}_{n, \alpha}` for a new -feature vector :math:`X_{n+1}` such that +feature vector :math:`X_{n+1}` such that .. math:: P \{Y_{n+1} \in \hat{C}_{n, \alpha}(X_{n+1}) \} \geq 1 - \alpha @@ -32,12 +32,12 @@ but other conformity scores are implemented in MAPIE (see :doc:`theoretical_desc 1. The "Naive" method ===================== -The so-called naive method computes the residuals of the training data to estimate the -typical error obtained on a new test data point. -The prediction interval is therefore given by the prediction obtained by the -model trained on the entire training set :math:`\pm` the quantiles of the +The so-called naive method computes the residuals of the training data to estimate the +typical error obtained on a new test data point. +The prediction interval is therefore given by the prediction obtained by the +model trained on the entire training set :math:`\pm` the quantiles of the conformity scores of the same training set: - + .. math:: \hat{\mu}(X_{n+1}) \pm ((1-\alpha) \textrm{quantile of} |Y_1-\hat{\mu}(X_1)|, ..., |Y_n-\hat{\mu}(X_n)|) or @@ -46,12 +46,12 @@ or where :math:`\hat{q}_{n, \alpha}^+` is the :math:`(1-\alpha)` quantile of the distribution. -Since this method estimates the conformity scores only on the training set, it tends to be too -optimistic and under-estimates the width of prediction intervals because of a potential overfit. -As a result, the probability that a new point lies in the interval given by the +Since this method estimates the conformity scores only on the training set, it tends to be too +optimistic and under-estimates the width of prediction intervals because of a potential overfit. +As a result, the probability that a new point lies in the interval given by the naive method would be lower than the target level :math:`(1-\alpha)`. -The figure below illustrates the naive method. +The figure below illustrates the naive method. .. image:: images/jackknife_naive.png :width: 200 @@ -60,12 +60,12 @@ The figure below illustrates the naive method. 2. The split method ===================== -The so-called split method computes the residuals of a calibration dataset to estimate the -typical error obtained on a new test data point. -The prediction interval is therefore given by the prediction obtained by the -model trained on the training set :math:`\pm` the quantiles of the +The so-called split method computes the residuals of a calibration dataset to estimate the +typical error obtained on a new test data point. +The prediction interval is therefore given by the prediction obtained by the +model trained on the training set :math:`\pm` the quantiles of the conformity scores of the calibration set: - + .. math:: \hat{\mu}(X_{n+1}) \pm ((1-\alpha) \textrm{quantile of} |Y_1-\hat{\mu}(X_1)|, ..., |Y_n-\hat{\mu}(X_n)|) or @@ -79,13 +79,13 @@ observations to split its original dataset into train and calibration as mention notice that this method is very similar to the naive one, the only difference being that the conformity scores are not computed on the calibration set. Moreover, this method will always give prediction intervals with a constant width. - + 3. The jackknife method ======================= -The *standard* jackknife method is based on the construction of a set of -*leave-one-out* models. +The *standard* jackknife method is based on the construction of a set of +*leave-one-out* models. Estimating the prediction intervals is carried out in three main steps: - For each instance *i = 1, ..., n* of the training set, we fit the regression function @@ -97,12 +97,12 @@ Estimating the prediction intervals is carried out in three main steps: - We fit the regression function :math:`\hat{\mu}` on the entire training set and we compute the prediction interval using the computed leave-one-out conformity scores: - + .. math:: \hat{\mu}(X_{n+1}) \pm ((1-\alpha) \textrm{ quantile of } |Y_1-\hat{\mu}_{-1}(X_1)|, ..., |Y_n-\hat{\mu}_{-n}(X_n)|) The resulting confidence interval can therefore be summarized as follows -.. math:: \hat{C}_{n, \alpha}^{\rm jackknife}(X_{n+1}) = [ \hat{q}_{n, \alpha}^-\{\hat{\mu}(X_{n+1}) - R_i^{\rm LOO} \}, \hat{q}_{n, \alpha}^+\{\hat{\mu}(X_{n+1}) + R_i^{\rm LOO} \}] +.. math:: \hat{C}_{n, \alpha}^{\rm jackknife}(X_{n+1}) = [ \hat{q}_{n, \alpha}^-\{\hat{\mu}(X_{n+1}) - R_i^{\rm LOO} \}, \hat{q}_{n, \alpha}^+\{\hat{\mu}(X_{n+1}) + R_i^{\rm LOO} \}] where @@ -110,24 +110,24 @@ where is the *leave-one-out* conformity score. -This method avoids the overfitting problem but can lose its predictive -cover when :math:`\hat{\mu}` becomes unstable, for example when the +This method avoids the overfitting problem but can lose its predictive +cover when :math:`\hat{\mu}` becomes unstable, for example when the sample size is closed to the number of features -(as seen in the "Reproducing the simulations from Foygel-Barber et al. (2020)" example). +(as seen in the "Reproducing the simulations from Foygel-Barber et al. (2020)" example). 4. The jackknife+ method ======================== -Unlike the standard jackknife method which estimates a prediction interval centered -around the prediction of the model trained on the entire dataset, the so-called jackknife+ -method uses each leave-one-out prediction on the new test point to take the variability of the +Unlike the standard jackknife method which estimates a prediction interval centered +around the prediction of the model trained on the entire dataset, the so-called jackknife+ +method uses each leave-one-out prediction on the new test point to take the variability of the regression function into account. The resulting confidence interval can therefore be summarized as follows -.. math:: \hat{C}_{n, \alpha}^{\rm jackknife+}(X_{n+1}) = [ \hat{q}_{n, \alpha}^-\{\hat{\mu}_{-i}(X_{n+1}) - R_i^{\rm LOO} \}, \hat{q}_{n, \alpha}^+\{\hat{\mu}_{-i}(X_{n+1}) + R_i^{\rm LOO} \}] +.. math:: \hat{C}_{n, \alpha}^{\rm jackknife+}(X_{n+1}) = [ \hat{q}_{n, \alpha}^-\{\hat{\mu}_{-i}(X_{n+1}) - R_i^{\rm LOO} \}, \hat{q}_{n, \alpha}^+\{\hat{\mu}_{-i}(X_{n+1}) + R_i^{\rm LOO} \}] -As described in [1], this method garantees a higher stability +As described in [1], this method garantees a higher stability with a coverage level of :math:`1-2\alpha` for a target coverage level of :math:`1-\alpha`, without any *a priori* assumption on the distribution of the data :math:`(X, Y)` nor on the predictive model. @@ -135,17 +135,17 @@ nor on the predictive model. 5. The jackknife-minmax method ============================== -The jackknife-minmax method offers a slightly more conservative alternative since it uses +The jackknife-minmax method offers a slightly more conservative alternative since it uses the minimal and maximal values of the leave-one-out predictions to compute the prediction intervals. The estimated prediction intervals can be defined as follows -.. math:: +.. math:: - \hat{C}_{n, \alpha}^{\rm jackknife-mm}(X_{n+1}) = - [\min \hat{\mu}_{-i}(X_{n+1}) - \hat{q}_{n, \alpha}^+\{R_I^{\rm LOO} \}, - \max \hat{\mu}_{-i}(X_{n+1}) + \hat{q}_{n, \alpha}^+\{R_I^{\rm LOO} \}] + \hat{C}_{n, \alpha}^{\rm jackknife-mm}(X_{n+1}) = + [\min \hat{\mu}_{-i}(X_{n+1}) - \hat{q}_{n, \alpha}^+\{R_I^{\rm LOO} \}, + \max \hat{\mu}_{-i}(X_{n+1}) + \hat{q}_{n, \alpha}^+\{R_I^{\rm LOO} \}] -As justified by [1], this method garantees a coverage level of +As justified by [1], this method garantees a coverage level of :math:`1-\alpha` for a target coverage level of :math:`1-\alpha`. The figure below, adapted from Fig. 1 of [1], illustrates the three jackknife @@ -154,9 +154,9 @@ methods and emphasizes their main differences. .. image:: images/jackknife_jackknife.png :width: 800 -However, the jackknife, jackknife+ and jackknife-minmax methods are computationally heavy since -they require to run as many simulations as the number of training points, which is prohibitive -for a typical data science use case. +However, the jackknife, jackknife+ and jackknife-minmax methods are computationally heavy since +they require to run as many simulations as the number of training points, which is prohibitive +for a typical data science use case. 6. The CV+ method @@ -168,24 +168,24 @@ instead of a leave-one-out approach, called the CV+ method. By analogy with the jackknife+ method, estimating the prediction intervals with CV+ is performed in four main steps: -- We split the training set into *K* disjoint subsets :math:`S_1, S_2, ..., S_K` of equal size. - -- *K* regression functions :math:`\hat{\mu}_{-S_k}` are fitted on the training set with the +- We split the training set into *K* disjoint subsets :math:`S_1, S_2, ..., S_K` of equal size. + +- *K* regression functions :math:`\hat{\mu}_{-S_k}` are fitted on the training set with the corresponding :math:`k^{th}` fold removed. -- The corresponding *out-of-fold* conformity score is computed for each :math:`i^{th}` point +- The corresponding *out-of-fold* conformity score is computed for each :math:`i^{th}` point :math:`|Y_i - \hat{\mu}_{-S_{k(i)}}(X_i)|` where *k(i)* is the fold containing *i*. -- Similar to the jackknife+, the regression functions :math:`\hat{\mu}_{-S_{k(i)}}(X_i)` - are used to estimate the prediction intervals. +- Similar to the jackknife+, the regression functions :math:`\hat{\mu}_{-S_{k(i)}}(X_i)` + are used to estimate the prediction intervals. -As for jackknife+, this method garantees a coverage level higher than :math:`1-2\alpha` -for a target coverage level of :math:`1-\alpha`, without any *a priori* assumption on +As for jackknife+, this method garantees a coverage level higher than :math:`1-2\alpha` +for a target coverage level of :math:`1-\alpha`, without any *a priori* assumption on the distribution of the data. -As noted by [1], the jackknife+ can be viewed as a special case of the CV+ -in which :math:`K = n`. -In practice, this method results in slightly wider prediction intervals and is therefore -more conservative, but gives a reasonable compromise for large datasets when the Jacknife+ +As noted by [1], the jackknife+ can be viewed as a special case of the CV+ +in which :math:`K = n`. +In practice, this method results in slightly wider prediction intervals and is therefore +more conservative, but gives a reasonable compromise for large datasets when the Jacknife+ method is unfeasible. @@ -195,7 +195,7 @@ method is unfeasible. By analogy with the standard jackknife and jackknife-minmax methods, the CV and CV-minmax approaches are also included in MAPIE. As for the CV+ method, they rely on out-of-fold regression models that are used to compute the prediction intervals but using the equations given in the jackknife and -jackknife-minmax sections. +jackknife-minmax sections. The figure below, adapted from Fig. 1 of [1], illustrates the three CV @@ -208,40 +208,40 @@ methods and emphasizes their main differences. 8. The jackknife+-after-bootstrap method ======================================== -In order to reduce the computational time, and get more robust predictions, -one can adopt a bootstrap approach instead of a leave-one-out approach, called +In order to reduce the computational time, and get more robust predictions, +one can adopt a bootstrap approach instead of a leave-one-out approach, called the jackknife+-after-bootstrap method, offered by Kim and al. [2]. Intuitively, this method uses ensemble methodology to calculate the :math:`i^{\text{th}}` aggregated prediction and residual by only taking subsets in which the :math:`i^{\text{th}}` observation is not used to fit the estimator. -By analogy with the CV+ method, estimating the prediction intervals with +By analogy with the CV+ method, estimating the prediction intervals with jackknife+-after-bootstrap is performed in four main steps: - We resample the training set with replacement (boostrap) :math:`K` times, and thus we get the (non disjoint) bootstraps :math:`B_{1},..., B_{K}` of equal size. -- :math:`K` regressions functions :math:`\hat{\mu}_{B_{k}}` are then fitted on - the bootstraps :math:`(B_{k})`, and the predictions on the complementary sets +- :math:`K` regressions functions :math:`\hat{\mu}_{B_{k}}` are then fitted on + the bootstraps :math:`(B_{k})`, and the predictions on the complementary sets :math:`(B_k^c)` are computed. -- These predictions are aggregated according to a given aggregation function - :math:`{\rm agg}`, typically :math:`{\rm mean}` or :math:`{\rm median}`, and the conformity scores +- These predictions are aggregated according to a given aggregation function + :math:`{\rm agg}`, typically :math:`{\rm mean}` or :math:`{\rm median}`, and the conformity scores :math:`|Y_j - {\rm agg}(\hat{\mu}(B_{K(j)}(X_j)))|` are computed for each :math:`X_j` (with :math:`K(j)` the boostraps not containing :math:`X_j`). - -- The sets :math:`\{\rm agg(\hat{\mu}_{K(j)}(X_i)) + r_j\}` (where :math:`j` indexes + +- The sets :math:`\{\rm agg(\hat{\mu}_{K(j)}(X_i)) + r_j\}` (where :math:`j` indexes the training set) are used to estimate the prediction intervals. -As for jackknife+, this method guarantees a coverage level higher than -:math:`1 - 2\alpha` for a target coverage level of :math:`1 - \alpha`, without -any a priori assumption on the distribution of the data. -In practice, this method results in wider prediction intervals, when the -uncertainty is higher, than :math:`CV+`, because the models' prediction spread +As for jackknife+, this method guarantees a coverage level higher than +:math:`1 - 2\alpha` for a target coverage level of :math:`1 - \alpha`, without +any a priori assumption on the distribution of the data. +In practice, this method results in wider prediction intervals, when the +uncertainty is higher, than :math:`CV+`, because the models' prediction spread is then higher. @@ -253,21 +253,21 @@ heteroscedastic data. It uses quantile regressors with different quantile values to estimate the prediction bounds and the residuals of these methods is used to create the guaranteed coverage value. -.. math:: +.. math:: - \hat{C}_{n, \alpha}^{\rm CQR}(X_{n+1}) = + \hat{C}_{n, \alpha}^{\rm CQR}(X_{n+1}) = [\hat{q}_{\alpha_{lo}}(X_{n+1}) - Q_{1-\alpha}(E_{low}, \mathcal{I}_2), \hat{q}_{\alpha_{hi}}(X_{n+1}) + Q_{1-\alpha}(E_{high}, \mathcal{I}_2)] Where :math:`Q_{1-\alpha}(E, \mathcal{I}_2) := (1-\alpha)(1+1/ |\mathcal{I}_2|)`-th empirical quantile of :math:`{E_i : i \in \mathcal{I}_2}` and :math:`\mathcal{I}_2` is the -residuals of the estimator fitted on the calibration set. Note that in the symmetric method, +residuals of the estimator fitted on the calibration set. Note that in the symmetric method, :math:`E_{low}` and :math:`E_{high}` are equal. -As justified by [3], this method offers a theoretical guarantee of the target coverage +As justified by [3], this method offers a theoretical guarantee of the target coverage level :math:`1-\alpha`. -Note that only the split method has been implemented and that it will run three separate +Note that only the split method has been implemented and that it will run three separate regressions when using :class:`mapie.quantile_regression.MapieQuantileRegressor`. @@ -291,14 +291,14 @@ However the confidence intervals are like those of the jackknife method. where :math:`\hat{\mu}_{agg}(X_{n+1})` is the aggregation of the predictions of the LOO estimators (mean or median), and -:math:`R_i^{\rm LOO} = |Y_i - \hat{\mu}_{-i}(X_{i})|` +:math:`R_i^{\rm LOO} = |Y_i - \hat{\mu}_{-i}(X_{i})|` is the residual of the LOO estimator :math:`\hat{\mu}_{-i}` at :math:`X_{i}` [4]. The residuals are no longer considered in absolute values but in relative values and the width of the confidence intervals are minimized, up to a given gap between the quantiles' level, optimizing the parameter :math:`\beta`. -Moreover, the residuals are updated during the prediction, each time new observations +Moreover, the residuals are updated during the prediction, each time new observations are available. So that the deterioration of predictions, or the increase of noise level, can be dynamically taken into account. @@ -313,7 +313,7 @@ hypotheses: .. math:: \frac{1}{T}\sum_1^T(\hat{\mu}_{-t}(x_t) - \mu(x_t))^2 < \delta_T^2 -The coverage level depends on the size of the training set and on +The coverage level depends on the size of the training set and on :math:`(\delta_T)_{T > 0}`. Be careful: the bigger the training set, the better the covering guarantee @@ -333,13 +333,13 @@ Key takeaways - Since the typical coverage levels estimated by jackknife+ follow very closely the target coverage levels, this method should be used when accurate and robust prediction intervals are required. -- For practical applications where :math:`n` is large and/or the computational time of each - *leave-one-out* simulation is high, it is advised to adopt the CV+ method, based on *out-of-fold* - simulations, or the jackknife+-after-bootstrap method, instead. - Indeed, the methods based on the jackknife resampling approach are very cumbersome because they +- For practical applications where :math:`n` is large and/or the computational time of each + *leave-one-out* simulation is high, it is advised to adopt the CV+ method, based on *out-of-fold* + simulations, or the jackknife+-after-bootstrap method, instead. + Indeed, the methods based on the jackknife resampling approach are very cumbersome because they require to run a high number of simulations, equal to the number of training samples :math:`n`. -- Although the CV+ method results in prediction intervals that are slightly larger than for the +- Although the CV+ method results in prediction intervals that are slightly larger than for the jackknife+ method, it offers a good compromise between computational time and accurate predictions. - The jackknife+-after-bootstrap method results in the same computational efficiency, and @@ -349,7 +349,7 @@ Key takeaways theoretical and practical coverages due to the larger widths of the prediction intervals. It is therefore advised to use them when conservative estimates are needed. -- The conformalized quantile regression method allows for more adaptiveness on the prediction +- The conformalized quantile regression method allows for more adaptiveness on the prediction intervals which becomes key when faced with heteroscedastic data. - If the "exchangeability hypothesis" is not valid, typically for time series, @@ -379,10 +379,10 @@ References [3] Yaniv Romano, Evan Patterson, Emmanuel J. Candès. "Conformalized Quantile Regression." Advances in neural information processing systems 32 (2019). -[4] Chen Xu and Yao Xie. +[4] Chen Xu and Yao Xie. "Conformal Prediction Interval for Dynamic Time-Series." International Conference on Machine Learning (ICML, 2021). [5] Jing Lei, Max G’Sell, Alessandro Rinaldo, Ryan J Tibshirani, and Larry Wasserman. -"Distribution-free predictive inference for regression". -Journal of the American Statistical Association, 113(523):1094–1111, 2018. \ No newline at end of file +"Distribution-free predictive inference for regression". +Journal of the American Statistical Association, 113(523):1094–1111, 2018. diff --git a/environment.doc.yml b/environment.doc.yml index a38494e9..b1ea21a8 100644 --- a/environment.doc.yml +++ b/environment.doc.yml @@ -10,4 +10,4 @@ dependencies: - typing_extensions=4.0.1 - pandas - matplotlib - - lightgbm \ No newline at end of file + - lightgbm diff --git a/examples/README.rst b/examples/README.rst index 1f8a7b3f..763f351f 100644 --- a/examples/README.rst +++ b/examples/README.rst @@ -1,4 +1,4 @@ .. _general_examples: General examples -================ \ No newline at end of file +================ diff --git a/examples/calibration/1-quickstart/README.rst b/examples/calibration/1-quickstart/README.rst index 278a2ed8..50a9bb56 100644 --- a/examples/calibration/1-quickstart/README.rst +++ b/examples/calibration/1-quickstart/README.rst @@ -3,4 +3,4 @@ 1. Quickstart examples ---------------------- -The following examples present the main functionalities of MAPIE through basic quickstart calibration problems. \ No newline at end of file +The following examples present the main functionalities of MAPIE through basic quickstart calibration problems. diff --git a/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py b/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py index 212bbdbe..3a3b6e4e 100644 --- a/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py +++ b/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py @@ -30,11 +30,7 @@ from sklearn.utils import check_random_state from mapie._typing import NDArray -from mapie.metrics import ( - cumulative_differences, - length_scale, - kolmogorov_smirnov_p_value -) +from mapie.metrics import cumulative_differences, length_scale, kolmogorov_smirnov_p_value #################################################################### # 1. Create 1-dimensional dataset and scores to test for calibration @@ -50,10 +46,7 @@ def sigmoid(x: NDArray): return y -def generate_y_true_calibrated( - y_prob: NDArray, - random_state: int = 1 -) -> NDArray: +def generate_y_true_calibrated(y_prob: NDArray, random_state: int = 1) -> NDArray: generator = check_random_state(random_state) uniform = generator.uniform(size=len(y_prob)) y_true = (uniform <= y_prob).astype(float) @@ -68,11 +61,7 @@ def generate_y_true_calibrated( # Next we provide two additional miscalibrated scores (on purpose). -y = { - "y_prob": y_prob, - "y_pred_1": sigmoid(1.3*X), - "y_pred_2": sigmoid(0.7*X) -} +y = {"y_prob": y_prob, "y_pred_1": sigmoid(1.3 * X), "y_pred_2": sigmoid(0.7 * X)} #################################################################### # This is how the two miscalibration curves stands next to the @@ -118,16 +107,13 @@ def generate_y_true_calibrated( # First we compute the cumulative differences. -cum_diffs = { - name: cumulative_differences(y_true, y_score) - for name, y_score in y.items() -} +cum_diffs = {name: cumulative_differences(y_true, y_score) for name, y_score in y.items()} #################################################################### # We want to plot is along the proportion of scores taken into account. -k = np.arange(len(y_true))/len(y_true) +k = np.arange(len(y_true)) / len(y_true) #################################################################### # We also want to compare the extension of the curve to that of a typical @@ -140,10 +126,7 @@ def generate_y_true_calibrated( # Finally, we compute the p-value according to Kolmogorov-Smirnov test [2, 3]. -p_values = { - name: kolmogorov_smirnov_p_value(y_true, y_score) - for name, y_score in y.items() -} +p_values = {name: kolmogorov_smirnov_p_value(y_true, y_score) for name, y_score in y.items()} #################################################################### # The graph hereafter shows cumulative differences of each series of scores. @@ -161,8 +144,8 @@ def generate_y_true_calibrated( for name, cum_diff in cum_diffs.items(): plt.plot(k, cum_diff, label=f"name (p-value = {p_values[name]:.5f})") -plt.axhline(y=2*sigma, color="r", linestyle="--") -plt.axhline(y=-2*sigma, color="r", linestyle="--") +plt.axhline(y=2 * sigma, color="r", linestyle="--") +plt.axhline(y=-2 * sigma, color="r", linestyle="--") plt.title("Probability curves") plt.xlabel("Proportion of scores considered") plt.ylabel("Cumulative differences with the ground truth") diff --git a/examples/calibration/2-advanced-analysis/README.rst b/examples/calibration/2-advanced-analysis/README.rst index 84b57c3d..48909ce4 100644 --- a/examples/calibration/2-advanced-analysis/README.rst +++ b/examples/calibration/2-advanced-analysis/README.rst @@ -3,4 +3,4 @@ 2. Advanced analysis -------------------- -The following examples use MAPIE for discussing more complex calibration problems. \ No newline at end of file +The following examples use MAPIE for discussing more complex calibration problems. diff --git a/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py b/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py index aafd220b..1a544bc2 100644 --- a/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py +++ b/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py @@ -40,11 +40,7 @@ from sklearn.utils import check_random_state from mapie._typing import NDArray -from mapie.metrics import ( - kolmogorov_smirnov_p_value, - kuiper_p_value, - spiegelhalter_p_value -) +from mapie.metrics import kolmogorov_smirnov_p_value, kuiper_p_value, spiegelhalter_p_value ############################################################################## # First we need to generate scores that are perfecty calibrated. To do so, @@ -52,15 +48,13 @@ # and draw random labels 0 or 1 according to these probabilities. -def generate_y_true_calibrated( - y_prob: NDArray, - random_state: int = 1 -) -> NDArray: +def generate_y_true_calibrated(y_prob: NDArray, random_state: int = 1) -> NDArray: generator = check_random_state(random_state) uniform = generator.uniform(size=len(y_prob)) y_true = (uniform <= y_prob).astype(float) return y_true + ############################################################################## # Then, we draw many different calibrated datasets, each with a fixed # dataset size. For each of these datasets, we compute the available p-values @@ -94,17 +88,10 @@ def generate_y_true_calibrated( # and Kolmogorov-Smirnov. plt.hist( - ks_p_values, 100, - cumulative=True, density=True, histtype="step", label="Kolmogorov-Smirnov" -) -plt.hist( - ku_p_values, 100, - cumulative=True, density=True, histtype="step", label="Kuiper" -) -plt.hist( - sp_p_values, 100, - cumulative=True, density=True, histtype="step", label="Spiegelhalter" + ks_p_values, 100, cumulative=True, density=True, histtype="step", label="Kolmogorov-Smirnov" ) +plt.hist(ku_p_values, 100, cumulative=True, density=True, histtype="step", label="Kuiper") +plt.hist(sp_p_values, 100, cumulative=True, density=True, histtype="step", label="Spiegelhalter") plt.plot([0, 1], [0, 1], "--", color="black") plt.title("Distribution of p-values for calibrated datasets") plt.xlabel("p-values") diff --git a/examples/calibration/README.rst b/examples/calibration/README.rst index 0b1e60a3..750bd94b 100644 --- a/examples/calibration/README.rst +++ b/examples/calibration/README.rst @@ -1,4 +1,4 @@ .. _calibration_examples: Calibration examples -======================= \ No newline at end of file +======================= diff --git a/examples/classification/1-quickstart/README.rst b/examples/classification/1-quickstart/README.rst index 1f97208d..0b6f4331 100644 --- a/examples/classification/1-quickstart/README.rst +++ b/examples/classification/1-quickstart/README.rst @@ -3,4 +3,4 @@ 1. Quickstart examples ---------------------- -The following examples present the main functionalities of MAPIE through basic quickstart classification problems. \ No newline at end of file +The following examples present the main functionalities of MAPIE through basic quickstart classification problems. diff --git a/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py b/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py index b03e8cb9..5ba309aa 100644 --- a/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py +++ b/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py @@ -55,8 +55,7 @@ from mapie._typing import NDArray from mapie.classification import MapieClassifier -from mapie.metrics import (classification_coverage_score, - classification_mean_width_score) +from mapie.metrics import classification_coverage_score, classification_mean_width_score centers = [(0, 3.5), (-2, 0), (2, 0)] covs = [np.eye(2), np.eye(2) * 2, np.diag([5, 1])] @@ -65,17 +64,12 @@ n_classes = 3 np.random.seed(42) X = np.vstack( - [ - np.random.multivariate_normal(center, cov, n_samples) - for center, cov in zip(centers, covs) - ] + [np.random.multivariate_normal(center, cov, n_samples) for center, cov in zip(centers, covs)] ) y = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) X_train, X_cal, y_train, y_cal = train_test_split(X, y, test_size=0.3) -xx, yy = np.meshgrid( - np.arange(x_min, x_max, step), np.arange(x_min, x_max, step) -) +xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(x_min, x_max, step)) X_test = np.stack([xx.ravel(), yy.ravel()], axis=1) @@ -124,7 +118,9 @@ ) mapie[method].fit(X_cal, y_cal) y_pred_mapie[method], y_ps_mapie[method] = mapie[method].predict( - X_test, alpha=alpha, include_last_label=True, + X_test, + alpha=alpha, + include_last_label=True, ) @@ -180,9 +176,7 @@ def plot_scores( # different values ​​of alpha. -def plot_results( - alphas: List[float], y_pred_mapie: NDArray, y_ps_mapie: NDArray -) -> None: +def plot_results(alphas: List[float], y_pred_mapie: NDArray, y_ps_mapie: NDArray) -> None: tab10 = plt.cm.get_cmap("Purples", 4) colors = { 0: "#1f77b4", @@ -254,16 +248,12 @@ def plot_results( random_state=42, ) mapie[method].fit(X_cal, y_cal) - _, y_ps_mapie[method] = mapie[method].predict( - X, alpha=alpha_, include_last_label="randomized" - ) + _, y_ps_mapie[method] = mapie[method].predict(X, alpha=alpha_, include_last_label="randomized") coverage[method] = [ - classification_coverage_score(y, y_ps_mapie[method][:, :, i]) - for i, _ in enumerate(alpha_) + classification_coverage_score(y, y_ps_mapie[method][:, :, i]) for i, _ in enumerate(alpha_) ] mean_width[method] = [ - classification_mean_width_score(y_ps_mapie[method][:, :, i]) - for i, _ in enumerate(alpha_) + classification_mean_width_score(y_ps_mapie[method][:, :, i]) for i, _ in enumerate(alpha_) ] fig, axs = plt.subplots(1, 3, figsize=(15, 5)) diff --git a/examples/classification/2-advanced-analysis/README.rst b/examples/classification/2-advanced-analysis/README.rst index 5f0ebdb9..e7a1f838 100644 --- a/examples/classification/2-advanced-analysis/README.rst +++ b/examples/classification/2-advanced-analysis/README.rst @@ -3,4 +3,4 @@ 2. Advanced analysis -------------------- -The following examples use MAPIE for discussing more complex MAPIE classification problems. \ No newline at end of file +The following examples use MAPIE for discussing more complex MAPIE classification problems. diff --git a/examples/classification/3-scientific-articles/README.rst b/examples/classification/3-scientific-articles/README.rst index 4aa1d9a4..d81f2d70 100644 --- a/examples/classification/3-scientific-articles/README.rst +++ b/examples/classification/3-scientific-articles/README.rst @@ -5,4 +5,4 @@ The following examples reproduce the simulations from the scientific articles that present the methods implemented -in MAPIE for classification settings. \ No newline at end of file +in MAPIE for classification settings. diff --git a/examples/classification/3-scientific-articles/plot_sadinle2019_example.py b/examples/classification/3-scientific-articles/plot_sadinle2019_example.py index 439123cb..ddf20e91 100644 --- a/examples/classification/3-scientific-articles/plot_sadinle2019_example.py +++ b/examples/classification/3-scientific-articles/plot_sadinle2019_example.py @@ -37,18 +37,13 @@ alpha = [0.2, 0.1, 0.05] np.random.seed(42) X_train = np.vstack( - [ - np.random.multivariate_normal(center, cov, n_samples) - for center, cov in zip(centers, covs) - ] + [np.random.multivariate_normal(center, cov, n_samples) for center, cov in zip(centers, covs)] ) y_train = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) # Create test from (x, y) coordinates -xx, yy = np.meshgrid( - np.arange(x_min, x_max, step), np.arange(x_min, x_max, step) -) +xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(x_min, x_max, step)) X_test = np.stack([xx.ravel(), yy.ravel()], axis=1) # Apply MapieClassifier on the dataset to get prediction sets @@ -67,9 +62,7 @@ y_train_col = list(map(colors.get, y_train)) y_train_col = [colors[int(i)] for _, i in enumerate(y_train)] fig, axs = plt.subplots(1, 4, figsize=(20, 4)) -axs[0].scatter( - X_test[:, 0], X_test[:, 1], color=y_pred_col, marker=".", s=10, alpha=0.4 -) +axs[0].scatter(X_test[:, 0], X_test[:, 1], color=y_pred_col, marker=".", s=10, alpha=0.4) axs[0].scatter( X_train[:, 0], X_train[:, 1], diff --git a/examples/classification/4-tutorials/README.rst b/examples/classification/4-tutorials/README.rst index 2724fa0a..5d8d0510 100644 --- a/examples/classification/4-tutorials/README.rst +++ b/examples/classification/4-tutorials/README.rst @@ -3,4 +3,4 @@ 4. Tutorials ------------ -The following examples present pedagogical tutorials explaining how to use MAPIE on different classification taks. \ No newline at end of file +The following examples present pedagogical tutorials explaining how to use MAPIE on different classification taks. diff --git a/examples/classification/4-tutorials/plot_crossconformal.py b/examples/classification/4-tutorials/plot_crossconformal.py index 8200e6c2..24c4bd16 100644 --- a/examples/classification/4-tutorials/plot_crossconformal.py +++ b/examples/classification/4-tutorials/plot_crossconformal.py @@ -37,8 +37,7 @@ from mapie._typing import NDArray from mapie.classification import MapieClassifier -from mapie.metrics import (classification_coverage_score, - classification_mean_width_score) +from mapie.metrics import classification_coverage_score, classification_mean_width_score ############################################################################## # 1. Estimating the impact of train/calibration split on the prediction sets @@ -59,23 +58,20 @@ n_cv = 5 np.random.seed(42) -X_train = np.vstack([ - np.random.multivariate_normal(center, cov, n_samples) - for center, cov in zip(centers, covs) -]) +X_train = np.vstack( + [np.random.multivariate_normal(center, cov, n_samples) for center, cov in zip(centers, covs)] +) y_train = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) -X_test_distrib = np.vstack([ - np.random.multivariate_normal(center, cov, 10*n_samples) - for center, cov in zip(centers, covs) -]) -y_test_distrib = np.hstack( - [np.full(10*n_samples, i) for i in range(n_classes)] +X_test_distrib = np.vstack( + [ + np.random.multivariate_normal(center, cov, 10 * n_samples) + for center, cov in zip(centers, covs) + ] ) +y_test_distrib = np.hstack([np.full(10 * n_samples, i) for i in range(n_classes)]) -xx, yy = np.meshgrid( - np.arange(x_min, x_max, step), np.arange(x_min, x_max, step) -) +xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(x_min, x_max, step)) X_test = np.stack([xx.ravel(), yy.ravel()], axis=1) @@ -123,7 +119,10 @@ ) y_preds_[fold], y_ps_mapies_[fold] = y_pred_mapie, y_ps_mapie clfs[method], mapies[method], y_preds[method], y_ps_mapies[method] = ( - clfs_, mapies_, y_preds_, y_ps_mapies_ + clfs_, + mapies_, + y_preds_, + y_ps_mapies_, ) @@ -138,10 +137,7 @@ axs[i].hist(mapie.conformity_scores_) axs[i].axvline(mapie.quantiles_[9], ls="--", color="k") axs[i].set_title(f"split={key}\nquantile={mapie.quantiles_[9]:.3f}") -plt.suptitle( - "Distribution of scores on each calibration fold for the " - f"{methods[0]} method" -) +plt.suptitle("Distribution of scores on each calibration fold for the " f"{methods[0]} method") plt.show() @@ -160,35 +156,30 @@ def plot_results( X_test2: NDArray, y_test2: NDArray, alpha: float, - method: str + method: str, ) -> None: - tab10 = plt.cm.get_cmap('Purples', 4) + tab10 = plt.cm.get_cmap("Purples", 4) fig, axs = plt.subplots(1, len(mapies), figsize=(20, 4)) for i, (_, mapie) in enumerate(mapies.items()): - y_pi_sums = mapie.predict( - X_test, - alpha=alpha, - include_last_label=True - )[1][:, :, 0].sum(axis=1) + y_pi_sums = mapie.predict(X_test, alpha=alpha, include_last_label=True)[1][:, :, 0].sum( + axis=1 + ) axs[i].scatter( X_test[:, 0], X_test[:, 1], c=y_pi_sums, - marker='.', + marker=".", s=10, alpha=1, cmap=tab10, vmin=0, - vmax=3 + vmax=3, ) coverage = classification_coverage_score( y_test2, mapie.predict(X_test2, alpha=alpha)[1][:, :, 0] ) axs[i].set_title(f"coverage = {coverage:.3f}") - plt.suptitle( - "Number of labels in prediction sets " - f"for the {method} method" - ) + plt.suptitle("Number of labels in prediction sets " f"for the {method} method") plt.show() @@ -198,23 +189,9 @@ def plot_results( # prediction set size as function of the ``alpha`` parameter. -plot_results( - mapies["lac"], - X_test, - X_test_distrib, - y_test_distrib, - alpha[9], - "lac" -) +plot_results(mapies["lac"], X_test, X_test_distrib, y_test_distrib, alpha[9], "lac") -plot_results( - mapies["aps"], - X_test, - X_test_distrib, - y_test_distrib, - alpha[9], - "aps" -) +plot_results(mapies["aps"], X_test, X_test_distrib, y_test_distrib, alpha[9], "aps") ############################################################################## @@ -227,7 +204,7 @@ def plot_coverage_width( coverages: List[NDArray], widths: List[NDArray], method: str, - comp: str = "split" + comp: str = "split", ) -> None: if comp == "split": legends = [f"Split {i + 1}" for i, _ in enumerate(coverages)] @@ -245,10 +222,7 @@ def plot_coverage_width( for i, width in enumerate(widths): axes[1].plot(1 - alpha, width, label=legends[i]) axes[1].legend() - plt.suptitle( - "Effective coverage and prediction set size " - f"for the {method} method" - ) + plt.suptitle("Effective coverage and prediction set size " f"for the {method} method") plt.show() @@ -256,33 +230,28 @@ def plot_coverage_width( [ [ [ - classification_coverage_score( - y_test_distrib, y_ps[:, :, ia] - ) for ia, _ in enumerate(alpha)] + classification_coverage_score(y_test_distrib, y_ps[:, :, ia]) + for ia, _ in enumerate(alpha) + ] for _, y_ps in y_ps2.items() - ] for _, y_ps2 in y_ps_mapies.items() + ] + for _, y_ps2 in y_ps_mapies.items() ] ) split_widths = np.array( [ [ - [ - classification_mean_width_score(y_ps[:, :, ia]) - for ia, _ in enumerate(alpha) - ] + [classification_mean_width_score(y_ps[:, :, ia]) for ia, _ in enumerate(alpha)] for _, y_ps in y_ps2.items() - ] for _, y_ps2 in y_ps_mapies.items() + ] + for _, y_ps2 in y_ps_mapies.items() ] ) -plot_coverage_width( - alpha, split_coverages[0], split_widths[0], "lac" -) +plot_coverage_width(alpha, split_coverages[0], split_widths[0], "lac") -plot_coverage_width( - alpha, split_coverages[1], split_widths[1], "aps" -) +plot_coverage_width(alpha, split_coverages[1], split_widths[1], "aps") ############################################################################## @@ -319,19 +288,10 @@ def plot_coverage_width( # with the ``agg_scores`` attribute. Params = TypedDict( - "Params", - { - "method": str, - "cv": Optional[Union[int, str]], - "random_state": Optional[int] - } + "Params", {"method": str, "cv": Optional[Union[int, str]], "random_state": Optional[int]} ) ParamsPredict = TypedDict( - "ParamsPredict", - { - "include_last_label": Union[bool, str], - "agg_scores": str - } + "ParamsPredict", {"include_last_label": Union[bool, str], "agg_scores": str} ) kf = KFold(n_splits=5, shuffle=True) @@ -339,20 +299,20 @@ def plot_coverage_width( STRATEGIES = { "score_cv_mean": ( Params(method="lac", cv=kf, random_state=42), - ParamsPredict(include_last_label=False, agg_scores="mean") + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "score_cv_crossval": ( Params(method="lac", cv=kf, random_state=42), - ParamsPredict(include_last_label=False, agg_scores="crossval") + ParamsPredict(include_last_label=False, agg_scores="crossval"), ), "cum_score_cv_mean": ( Params(method="aps", cv=kf, random_state=42), - ParamsPredict(include_last_label="randomized", agg_scores="mean") + ParamsPredict(include_last_label="randomized", agg_scores="mean"), ), "cum_score_cv_crossval": ( Params(method="aps", cv=kf, random_state=42), - ParamsPredict(include_last_label='randomized', agg_scores="crossval") - ) + ParamsPredict(include_last_label="randomized", agg_scores="crossval"), + ), } y_ps = {} @@ -360,11 +320,7 @@ def plot_coverage_width( args_init, args_predict = STRATEGIES[strategy] mapie_clf = MapieClassifier(**args_init) mapie_clf.fit(X_train, y_train) - _, y_ps[strategy] = mapie_clf.predict( - X_test_distrib, - alpha=alpha, - **args_predict - ) + _, y_ps[strategy] = mapie_clf.predict(X_test_distrib, alpha=alpha, **args_predict) ############################################################################## @@ -379,17 +335,12 @@ def plot_coverage_width( for strategy, y_ps_ in y_ps.items(): coverages[strategy] = np.array( [ - classification_coverage_score( - y_test_distrib, - y_ps_[:, :, ia] - ) for ia, _ in enumerate(alpha) + classification_coverage_score(y_test_distrib, y_ps_[:, :, ia]) + for ia, _ in enumerate(alpha) ] ) widths[strategy] = np.array( - [ - classification_mean_width_score(y_ps_[:, :, ia]) - for ia, _ in enumerate(alpha) - ] + [classification_mean_width_score(y_ps_[:, :, ia]) for ia, _ in enumerate(alpha)] ) violations[strategy] = np.abs(coverages[strategy] - (1 - alpha)).mean() @@ -403,7 +354,7 @@ def plot_coverage_width( [coverages["score_cv_mean"], coverages["score_cv_crossval"]], [widths["score_cv_mean"], widths["score_cv_crossval"]], "lac", - comp="mean" + comp="mean", ) plot_coverage_width( @@ -411,7 +362,7 @@ def plot_coverage_width( [coverages["cum_score_cv_mean"], coverages["cum_score_cv_mean"]], [widths["cum_score_cv_crossval"], widths["cum_score_cv_crossval"]], "aps", - comp="mean" + comp="mean", ) @@ -425,29 +376,16 @@ def plot_coverage_width( # impression by comparing the violation of the effective coverage from the # target coverage between the cross-conformal and split-conformal methods. -violations_df = pd.DataFrame( - index=["lac", "aps"], - columns=["cv_mean", "cv_crossval", "splits"] -) +violations_df = pd.DataFrame(index=["lac", "aps"], columns=["cv_mean", "cv_crossval", "splits"]) violations_df.loc["lac", "cv_mean"] = violations["score_cv_mean"] violations_df.loc["lac", "cv_crossval"] = violations["score_cv_crossval"] violations_df.loc["lac", "splits"] = np.stack( - [ - np.abs(cov - (1 - alpha)).mean() - for cov in split_coverages[0] - ] + [np.abs(cov - (1 - alpha)).mean() for cov in split_coverages[0]] ).mean() -violations_df.loc["aps", "cv_mean"] = ( - violations["cum_score_cv_mean"] -) -violations_df.loc["aps", "cv_crossval"] = ( - violations["cum_score_cv_crossval"] -) +violations_df.loc["aps", "cv_mean"] = violations["cum_score_cv_mean"] +violations_df.loc["aps", "cv_crossval"] = violations["cum_score_cv_crossval"] violations_df.loc["aps", "splits"] = np.stack( - [ - np.abs(cov - (1 - alpha)).mean() - for cov in split_coverages[1] - ] + [np.abs(cov - (1 - alpha)).mean() for cov in split_coverages[1]] ).mean() print(violations_df) diff --git a/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py b/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py index 93ea8137..aeffb5ba 100644 --- a/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py +++ b/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py @@ -35,8 +35,7 @@ from mapie._typing import NDArray from mapie.classification import MapieClassifier from sklearn.calibration import CalibratedClassifierCV -from mapie.metrics import (classification_coverage_score, - classification_mean_width_score) +from mapie.metrics import classification_coverage_score, classification_mean_width_score ############################################################################## @@ -79,19 +78,14 @@ n_classes = 2 np.random.seed(42) X = np.vstack( - [ - np.random.multivariate_normal(center, cov, n_samples) - for center, cov in zip(centers, covs) - ] + [np.random.multivariate_normal(center, cov, n_samples) for center, cov in zip(centers, covs)] ) y = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) X, X_val, y, y_val = train_test_split(X, y, test_size=0.5) X_train, X_cal, y_train, y_cal = train_test_split(X, y, test_size=0.3) X_c1, X_c2, y_c1, y_c2 = train_test_split(X_cal, y_cal, test_size=0.5) -xx, yy = np.meshgrid( - np.arange(x_min, x_max, step), np.arange(x_min, x_max, step) -) +xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(x_min, x_max, step)) X_test = np.stack([xx.ravel(), yy.ravel()], axis=1) @@ -132,19 +126,16 @@ y_pred_proba = clf.predict_proba(X_test) y_pred_proba_max = np.max(y_pred_proba, axis=1) -calib = CalibratedClassifierCV( - estimator=clf, method='sigmoid', cv='prefit' -) +calib = CalibratedClassifierCV(estimator=clf, method="sigmoid", cv="prefit") calib.fit(X_c1, y_c1) -mapie_clf = MapieClassifier( - estimator=calib, method='lac', cv='prefit', random_state=42 -) +mapie_clf = MapieClassifier(estimator=calib, method="lac", cv="prefit", random_state=42) mapie_clf.fit(X_c2, y_c2) alpha = [0.2, 0.1, 0.05] y_pred_mapie, y_ps_mapie = mapie_clf.predict( - X_test, alpha=alpha, + X_test, + alpha=alpha, ) @@ -190,7 +181,7 @@ def plot_scores( fig, axs = plt.subplots(1, 1, figsize=(10, 5)) conformity_scores = mapie_clf.conformity_scores_ quantiles = mapie_clf.quantiles_ -plot_scores(alpha, conformity_scores, quantiles, 'lac', axs) +plot_scores(alpha, conformity_scores, quantiles, "lac", axs) plt.show() @@ -198,6 +189,7 @@ def plot_scores( # We will now compare the differences between the prediction sets of the # different values ​​of alpha. + def plot_prediction_decision(y_pred_mapie: NDArray, ax) -> None: y_pred_col = list(map(colors.get, y_pred_mapie)) ax.scatter( @@ -245,14 +237,12 @@ def plot_prediction_set(y_ps: NDArray, alpha_: float, ax) -> None: plt.colorbar(num_labels, ax=ax) -def plot_results( - alphas: List[float], y_pred_mapie: NDArray, y_ps_mapie: NDArray -) -> None: +def plot_results(alphas: List[float], y_pred_mapie: NDArray, y_ps_mapie: NDArray) -> None: _, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(10, 10)) axs = {0: ax1, 1: ax2, 2: ax3, 3: ax4} plot_prediction_decision(y_pred_mapie, axs[0]) for i, alpha_ in enumerate(alphas): - plot_prediction_set(y_ps_mapie[:, :, i], alpha_, axs[i+1]) + plot_prediction_set(y_ps_mapie[:, :, i], alpha_, axs[i + 1]) plt.show() @@ -286,27 +276,17 @@ def plot_results( alpha_ = np.arange(0.02, 0.98, 0.02) -calib = CalibratedClassifierCV( - estimator=clf, method='sigmoid', cv='prefit' -) +calib = CalibratedClassifierCV(estimator=clf, method="sigmoid", cv="prefit") calib.fit(X_c1, y_c1) -mapie_clf = MapieClassifier( - estimator=calib, method='lac', cv='prefit', random_state=42 -) +mapie_clf = MapieClassifier(estimator=calib, method="lac", cv="prefit", random_state=42) mapie_clf.fit(X_c2, y_c2) -_, y_ps_mapie = mapie_clf.predict( - X, alpha=alpha_ -) +_, y_ps_mapie = mapie_clf.predict(X, alpha=alpha_) -coverage = np.array([ - classification_coverage_score(y, y_ps_mapie[:, :, i]) - for i, _ in enumerate(alpha_) -]) -mean_width = [ - classification_mean_width_score(y_ps_mapie[:, :, i]) - for i, _ in enumerate(alpha_) -] +coverage = np.array( + [classification_coverage_score(y, y_ps_mapie[:, :, i]) for i, _ in enumerate(alpha_)] +) +mean_width = [classification_mean_width_score(y_ps_mapie[:, :, i]) for i, _ in enumerate(alpha_)] def plot_coverages_widths(alpha, coverage, width, method): @@ -327,7 +307,7 @@ def plot_coverages_widths(alpha, coverage, width, method): plt.show() -plot_coverages_widths(alpha_, coverage, mean_width, 'lac') +plot_coverages_widths(alpha_, coverage, mean_width, "lac") ############################################################################## @@ -336,29 +316,21 @@ def plot_coverages_widths(alpha, coverage, width, method): alpha_ = np.arange(0.02, 0.16, 0.01) -calib = CalibratedClassifierCV( - estimator=clf, method='sigmoid', cv='prefit' -) +calib = CalibratedClassifierCV(estimator=clf, method="sigmoid", cv="prefit") calib.fit(X_c1, y_c1) -mapie_clf = MapieClassifier( - estimator=calib, method='lac', cv='prefit', random_state=42 -) +mapie_clf = MapieClassifier(estimator=calib, method="lac", cv="prefit", random_state=42) mapie_clf.fit(X_c2, y_c2) -_, y_ps_mapie = mapie_clf.predict( - X, alpha=alpha_ -) +_, y_ps_mapie = mapie_clf.predict(X, alpha=alpha_) -non_empty = np.mean( - np.any(mapie_clf.predict(X_test, alpha=alpha_)[1], axis=1), axis=0 -) +non_empty = np.mean(np.any(mapie_clf.predict(X_test, alpha=alpha_)[1], axis=1), axis=0) idx = np.argwhere(non_empty < 1)[0, 0] _, axs = plt.subplots(1, 3, figsize=(15, 5)) plot_prediction_decision(y_pred_mapie, axs[0]) -_, y_ps = mapie_clf.predict(X_test, alpha=alpha_[idx-1]) -plot_prediction_set(y_ps[:, :, 0], np.round(alpha_[idx-1], 3), axs[1]) -_, y_ps = mapie_clf.predict(X_test, alpha=alpha_[idx+1]) -plot_prediction_set(y_ps[:, :, 0], np.round(alpha_[idx+1], 3), axs[2]) +_, y_ps = mapie_clf.predict(X_test, alpha=alpha_[idx - 1]) +plot_prediction_set(y_ps[:, :, 0], np.round(alpha_[idx - 1], 3), axs[1]) +_, y_ps = mapie_clf.predict(X_test, alpha=alpha_[idx + 1]) +plot_prediction_set(y_ps[:, :, 0], np.round(alpha_[idx + 1], 3), axs[2]) plt.show() diff --git a/examples/classification/4-tutorials/plot_main-tutorial-classification.py b/examples/classification/4-tutorials/plot_main-tutorial-classification.py index a7905cfe..645fc27a 100644 --- a/examples/classification/4-tutorials/plot_main-tutorial-classification.py +++ b/examples/classification/4-tutorials/plot_main-tutorial-classification.py @@ -23,8 +23,7 @@ from sklearn.naive_bayes import GaussianNB from mapie.classification import MapieClassifier -from mapie.metrics import (classification_coverage_score, - classification_mean_width_score) +from mapie.metrics import classification_coverage_score, classification_mean_width_score ############################################################################## # 1. Conformal Prediction method using the softmax score of the true label @@ -60,42 +59,28 @@ # label. centers = [(0, 3.5), (-2, 0), (2, 0)] -covs = [np.eye(2), np.eye(2)*2, np.diag([5, 1])] +covs = [np.eye(2), np.eye(2) * 2, np.diag([5, 1])] x_min, x_max, y_min, y_max, step = -6, 8, -6, 8, 0.1 n_samples = 1000 n_classes = 3 np.random.seed(42) -X = np.vstack([ - np.random.multivariate_normal(center, cov, n_samples) - for center, cov in zip(centers, covs) -]) -y = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) -X_train_cal, X_test, y_train_cal, y_test = train_test_split( - X, y, test_size=0.2 -) -X_train, X_cal, y_train, y_cal = train_test_split( - X_train_cal, y_train_cal, test_size=0.25 +X = np.vstack( + [np.random.multivariate_normal(center, cov, n_samples) for center, cov in zip(centers, covs)] ) +y = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) +X_train_cal, X_test, y_train_cal, y_test = train_test_split(X, y, test_size=0.2) +X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.25) -xx, yy = np.meshgrid( - np.arange(x_min, x_max, step), np.arange(x_min, x_max, step) -) +xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(x_min, x_max, step)) X_test_mesh = np.stack([xx.ravel(), yy.ravel()], axis=1) ############################################################################## # Let’s see our training data. -colors = {0: "#1f77b4", 1: "#ff7f0e", 2: "#2ca02c", 3: "#d62728"} +colors = {0: "#1f77b4", 1: "#ff7f0e", 2: "#2ca02c", 3: "#d62728"} y_train_col = list(map(colors.get, y_train)) fig = plt.figure() -plt.scatter( - X_train[:, 0], - X_train[:, 1], - color=y_train_col, - marker='o', - s=10, - edgecolor='k' -) +plt.scatter(X_train[:, 0], X_train[:, 1], color=y_train_col, marker="o", s=10, edgecolor="k") plt.xlabel("X") plt.ylabel("Y") plt.show() @@ -134,7 +119,7 @@ def plot_scores(n, alphas, scores, quantiles): ymax=400, color=colors[i], ls="dashed", - label=f"alpha = {alphas[i]}" + label=f"alpha = {alphas[i]}", ) plt.title("Distribution of scores") plt.legend() @@ -162,35 +147,20 @@ def plot_scores(n, alphas, scores, quantiles): def plot_results(alphas, X, y_pred, y_ps): - tab10 = plt.cm.get_cmap('Purples', 4) - colors = {0: "#1f77b4", 1: "#ff7f0e", 2: "#2ca02c", 3: "#d62728"} + tab10 = plt.cm.get_cmap("Purples", 4) + colors = {0: "#1f77b4", 1: "#ff7f0e", 2: "#2ca02c", 3: "#d62728"} y_pred_col = list(map(colors.get, y_pred)) fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(10, 10)) - axs = {0: ax1, 1: ax2, 2: ax3, 3: ax4} - axs[0].scatter( - X[:, 0], - X[:, 1], - color=y_pred_col, - marker='.', - s=10, - alpha=0.4 - ) + axs = {0: ax1, 1: ax2, 2: ax3, 3: ax4} + axs[0].scatter(X[:, 0], X[:, 1], color=y_pred_col, marker=".", s=10, alpha=0.4) axs[0].set_title("Predicted labels") for i, alpha in enumerate(alphas): y_pi_sums = y_ps[:, :, i].sum(axis=1) - num_labels = axs[i+1].scatter( - X[:, 0], - X[:, 1], - c=y_pi_sums, - marker='.', - s=10, - alpha=1, - cmap=tab10, - vmin=0, - vmax=3 + num_labels = axs[i + 1].scatter( + X[:, 0], X[:, 1], c=y_pi_sums, marker=".", s=10, alpha=1, cmap=tab10, vmin=0, vmax=3 ) - plt.colorbar(num_labels, ax=axs[i+1]) - axs[i+1].set_title(f"Number of labels for alpha={alpha}") + plt.colorbar(num_labels, ax=axs[i + 1]) + axs[i + 1].set_title(f"Number of labels for alpha={alpha}") plt.show() @@ -212,12 +182,10 @@ def plot_results(alphas, X, y_pred, y_ps): alpha2 = np.arange(0.02, 0.98, 0.02) _, y_ps_score2 = mapie_score.predict(X_test, alpha=alpha2) coverages_score = [ - classification_coverage_score(y_test, y_ps_score2[:, :, i]) - for i, _ in enumerate(alpha2) + classification_coverage_score(y_test, y_ps_score2[:, :, i]) for i, _ in enumerate(alpha2) ] widths_score = [ - classification_mean_width_score(y_ps_score2[:, :, i]) - for i, _ in enumerate(alpha2) + classification_mean_width_score(y_ps_score2[:, :, i]) for i, _ in enumerate(alpha2) ] @@ -254,14 +222,10 @@ def plot_coverages_widths(alpha, coverage, width, method): # Let's visualize the prediction sets obtained with the APS method on the test # set after fitting MAPIE on the calibration set. -mapie_aps = MapieClassifier( - estimator=clf, cv="prefit", method="aps" -) +mapie_aps = MapieClassifier(estimator=clf, cv="prefit", method="aps") mapie_aps.fit(X_cal, y_cal) alpha = [0.2, 0.1, 0.05] -y_pred_aps, y_ps_aps = mapie_aps.predict( - X_test_mesh, alpha=alpha, include_last_label=True -) +y_pred_aps, y_ps_aps = mapie_aps.predict(X_test_mesh, alpha=alpha, include_last_label=True) plot_results(alpha, X_test_mesh, y_pred_aps, y_ps_aps) @@ -270,17 +234,11 @@ def plot_coverages_widths(alpha, coverage, width, method): # boundaries, but without null prediction sets with respect to the first # "lac" method. -_, y_ps_aps2 = mapie_aps.predict( - X_test, alpha=alpha2, include_last_label="randomized" -) +_, y_ps_aps2 = mapie_aps.predict(X_test, alpha=alpha2, include_last_label="randomized") coverages_aps = [ - classification_coverage_score(y_test, y_ps_aps2[:, :, i]) - for i, _ in enumerate(alpha2) -] -widths_aps = [ - classification_mean_width_score(y_ps_aps2[:, :, i]) - for i, _ in enumerate(alpha2) + classification_coverage_score(y_test, y_ps_aps2[:, :, i]) for i, _ in enumerate(alpha2) ] +widths_aps = [classification_mean_width_score(y_ps_aps2[:, :, i]) for i, _ in enumerate(alpha2)] plot_coverages_widths(alpha2, coverages_aps, widths_aps, "lac") diff --git a/examples/classification/README.rst b/examples/classification/README.rst index 828a4bd1..5e4ad4e5 100644 --- a/examples/classification/README.rst +++ b/examples/classification/README.rst @@ -1,4 +1,4 @@ .. _classification_examples: Classification examples -======================= \ No newline at end of file +======================= diff --git a/examples/multilabel_classification/1-quickstart/README.rst b/examples/multilabel_classification/1-quickstart/README.rst index 65aaf636..ee38bdb7 100644 --- a/examples/multilabel_classification/1-quickstart/README.rst +++ b/examples/multilabel_classification/1-quickstart/README.rst @@ -3,4 +3,4 @@ 1. Quickstart examples ---------------------- -The following examples present the main functionalities of MAPIE through basic quickstart regression problems. \ No newline at end of file +The following examples present the main functionalities of MAPIE through basic quickstart regression problems. diff --git a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py index af4d572e..771039f8 100644 --- a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py +++ b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py @@ -36,35 +36,25 @@ centers = [(0, 10), (-5, 0), (5, 0), (0, 5), (0, 0), (-4, 5), (5, 5)] covs = [ - np.eye(2), np.eye(2), np.eye(2), np.diag([5, 5]), np.diag([3, 1]), - np.array([ - [4, 3], - [3, 4] - ]), - np.array([ - [3, -2], - [-2, 3] - ]), + np.eye(2), + np.eye(2), + np.eye(2), + np.diag([5, 5]), + np.diag([3, 1]), + np.array([[4, 3], [3, 4]]), + np.array([[3, -2], [-2, 3]]), ] x_min, x_max, y_min, y_max, step = -15, 15, -5, 15, 0.1 n_samples = 800 -X = np.vstack([ - np.random.multivariate_normal(center, cov, n_samples) - for center, cov in zip(centers, covs) -]) -classes = [ - [1, 0, 1], [1, 1, 0], [0, 1, 1], [1, 1, 1], - [0, 1, 0], [1, 0, 0], [0, 0, 1] -] +X = np.vstack( + [np.random.multivariate_normal(center, cov, n_samples) for center, cov in zip(centers, covs)] +) +classes = [[1, 0, 1], [1, 1, 0], [0, 1, 1], [1, 1, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1]] y = np.vstack([np.full((n_samples, 3), row) for row in classes]) -X_train_cal, X_test, y_train_cal, y_test = train_test_split( - X, y, test_size=0.2 -) -X_train, X_cal, y_train, y_cal = train_test_split( - X_train_cal, y_train_cal, test_size=0.25 -) +X_train_cal, X_test, y_train_cal, y_test = train_test_split(X, y, test_size=0.2) +X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.25) ############################################################################## @@ -77,17 +67,18 @@ (0, 1, 0): {"color": "#d62728", "lac": "0-1-0"}, (1, 1, 0): {"color": "#ffd700", "lac": "1-1-0"}, (1, 0, 0): {"color": "#c20078", "lac": "1-0-0"}, - (1, 1, 1): {"color": "#06C2AC", "lac": "1-1-1"} + (1, 1, 1): {"color": "#06C2AC", "lac": "1-1-1"}, } for i in range(7): + color = y[n_samples * i] plt.scatter( - X[n_samples * i:n_samples * (i + 1), 0], - X[n_samples * i:n_samples * (i + 1), 1], - color=colors[tuple(y[n_samples * i])]["color"], - marker='o', + X[n_samples * i : n_samples * (i + 1), 0], + X[n_samples * i : n_samples * (i + 1), 1], + color=colors[(color[0], color[1], color[2])]["color"], + marker="o", s=10, - edgecolor='k' + edgecolor="k", ) plt.legend([c["lac"] for c in colors.values()]) plt.show() @@ -119,7 +110,7 @@ "RCPS - Hoeffding": ("rcps", "hoeffding"), "RCPS - Bernstein": ("rcps", "bernstein"), "RCPS - WSR": ("rcps", "wsr"), - "CRC": ("crc", None) + "CRC": ("crc", None), } clf = MultiOutputClassifier(GaussianNB()).fit(X_train, y_train) @@ -129,18 +120,13 @@ y_test_repeat = np.repeat(y_test[:, :, np.newaxis], len(alpha), 2) for i, (name, (method, bound)) in enumerate(method_params.items()): - mapie = MapieMultiLabelClassifier( - estimator=clf, method=method, metric_control="recall" - ) + mapie = MapieMultiLabelClassifier(estimator=clf, method=method, metric_control="recall") mapie.fit(X_cal, y_cal) - _, y_pss[name] = mapie.predict( - X_test, alpha=alpha, bound=bound, delta=.1 + _, y_pss[name] = mapie.predict(X_test, alpha=alpha, bound=bound, delta=0.1) + recalls[name] = ((y_test_repeat * y_pss[name]).sum(axis=1) / y_test_repeat.sum(axis=1)).mean( + axis=0 ) - recalls[name] = ( - (y_test_repeat * y_pss[name]).sum(axis=1) / - y_test_repeat.sum(axis=1) - ).mean(axis=0) thresholds[name] = mapie.lambdas_star r_hats[name] = mapie.r_hat r_hat_pluss[name] = mapie.r_hat_plus @@ -164,7 +150,7 @@ vars_y = [recalls, thresholds] labels_y = ["Average number of kept labels", "Recall", "Threshold"] -fig, axs = plt.subplots(1, len(vars_y), figsize=(8*len(vars_y), 8)) +fig, axs = plt.subplots(1, len(vars_y), figsize=(8 * len(vars_y), 8)) for i, var in enumerate(vars_y): for name, (method, bound) in method_params.items(): axs[i].plot(1 - alpha, var[name], label=name, linewidth=2) @@ -187,31 +173,19 @@ # * The CRC method gives the best results since it guarantees the coverage # with a larger threshold. -fig, axs = plt.subplots( - 1, - len(method_params), - figsize=(8*len(method_params), 8) -) +fig, axs = plt.subplots(1, len(method_params), figsize=(8 * len(method_params), 8)) for i, (name, (method, bound)) in enumerate(method_params.items()): - axs[i].plot( - mapie.lambdas, - r_hats[name], label=r"$\hat{R}$", linewidth=2 - ) + axs[i].plot(mapie.lambdas, r_hats[name], label=r"$\hat{R}$", linewidth=2) if name != "CRC": - axs[i].plot( - mapie.lambdas, - r_hat_pluss[name], label=r"$\hat{R}^+$", linewidth=2 - ) + axs[i].plot(mapie.lambdas, r_hat_pluss[name], label=r"$\hat{R}^+$", linewidth=2) axs[i].plot([0, 1], [alpha[9], alpha[9]], label=r"$\alpha$") axs[i].plot( - [thresholds[name][9], thresholds[name][9]], [0, 1], - label=r"$\lambda^*" + f" = {thresholds[name][9]}$" + [thresholds[name][9], thresholds[name][9]], + [0, 1], + label=r"$\lambda^*" + f" = {thresholds[name][9]}$", ) axs[i].legend(fontsize=20) - axs[i].set_title( - f"{name} - Recall = {round(recalls[name][9], 2)}", - fontsize=20 - ) + axs[i].set_title(f"{name} - Recall = {round(recalls[name][9], 2)}", fontsize=20) plt.show() ############################################################################## @@ -240,19 +214,11 @@ # doesn't necessarly pass the FWER control! This is what we are going to # explore. -mapie_clf = MapieMultiLabelClassifier( - estimator=clf, - method='ltt', - metric_control='precision' -) +mapie_clf = MapieMultiLabelClassifier(estimator=clf, method="ltt", metric_control="precision") mapie_clf.fit(X_cal, y_cal) alpha = 0.1 -_, y_ps = mapie_clf.predict( - X_test, - alpha=alpha, - delta=0.1 -) +_, y_ps = mapie_clf.predict(X_test, alpha=alpha, delta=0.1) valid_index = mapie_clf.valid_index[0] # valid_index is a list of list @@ -275,10 +241,9 @@ plt.figure(figsize=(8, 8)) plt.plot(mapie_clf.lambdas, r_hat, label=r"$\hat{R}_\lambda$") plt.plot([0, 1], [alpha, alpha], label=r"$\alpha$") -plt.axvspan(mini, maxi, facecolor='red', alpha=0.3, label=r"LTT-$\lambda$") +plt.axvspan(mini, maxi, facecolor="red", alpha=0.3, label=r"LTT-$\lambda$") plt.plot( - [lambdas[idx_max], lambdas[idx_max]], [0, 1], - label=r"$\lambda^* =" + f"{lambdas[idx_max]}$" + [lambdas[idx_max], lambdas[idx_max]], [0, 1], label=r"$\lambda^* =" + f"{lambdas[idx_max]}$" ) plt.xlabel(r"Threshold $\lambda$") plt.ylabel(r"Empirical risk: $\hat{R}_\lambda$") diff --git a/examples/multilabel_classification/README.rst b/examples/multilabel_classification/README.rst index 1f8a7b3f..763f351f 100644 --- a/examples/multilabel_classification/README.rst +++ b/examples/multilabel_classification/README.rst @@ -1,4 +1,4 @@ .. _general_examples: General examples -================ \ No newline at end of file +================ diff --git a/examples/regression/1-quickstart/README.rst b/examples/regression/1-quickstart/README.rst index cb770c87..a2fa3985 100644 --- a/examples/regression/1-quickstart/README.rst +++ b/examples/regression/1-quickstart/README.rst @@ -3,4 +3,4 @@ 1. Quickstart examples ---------------------- -The following examples present the main functionalities of MAPIE through basic quickstart regression problems. \ No newline at end of file +The following examples present the main functionalities of MAPIE through basic quickstart regression problems. diff --git a/examples/regression/1-quickstart/plot_compare_conformity_scores.py b/examples/regression/1-quickstart/plot_compare_conformity_scores.py index e4b79c70..663bc04e 100644 --- a/examples/regression/1-quickstart/plot_compare_conformity_scores.py +++ b/examples/regression/1-quickstart/plot_compare_conformity_scores.py @@ -89,9 +89,7 @@ # :class:`~mapie.conformity_scores.AbsoluteConformityScore`. mapie = MapieRegressor(model, random_state=random_state) mapie.fit(X_train, y_train) -y_pred_absconfscore, y_pis_absconfscore = mapie.predict( - X_test, alpha=alpha, ensemble=True -) +y_pred_absconfscore, y_pis_absconfscore = mapie.predict(X_test, alpha=alpha, ensemble=True) coverage_absconfscore = regression_coverage_score( y_test, y_pis_absconfscore[:, 0, 0], y_pis_absconfscore[:, 1, 0] @@ -113,29 +111,21 @@ def get_yerr(y_pred, y_pis): yerr_absconfscore = get_yerr(y_pred_absconfscore, y_pis_absconfscore) -pred_int_width_absconfscore = ( - y_pis_absconfscore[:, 1, 0] - y_pis_absconfscore[:, 0, 0] -) +pred_int_width_absconfscore = y_pis_absconfscore[:, 1, 0] - y_pis_absconfscore[:, 0, 0] ############################################################################## # Then, train the model with # :class:`~mapie.conformity_scores.GammaConformityScore`. -mapie = MapieRegressor( - model, conformity_score=GammaConformityScore(), random_state=random_state -) +mapie = MapieRegressor(model, conformity_score=GammaConformityScore(), random_state=random_state) mapie.fit(X_train, y_train) -y_pred_gammaconfscore, y_pis_gammaconfscore = mapie.predict( - X_test, alpha=[alpha], ensemble=True -) +y_pred_gammaconfscore, y_pis_gammaconfscore = mapie.predict(X_test, alpha=[alpha], ensemble=True) coverage_gammaconfscore = regression_coverage_score( y_test, y_pis_gammaconfscore[:, 0, 0], y_pis_gammaconfscore[:, 1, 0] ) yerr_gammaconfscore = get_yerr(y_pred_gammaconfscore, y_pis_gammaconfscore) -pred_int_width_gammaconfscore = ( - y_pis_gammaconfscore[:, 1, 0] - y_pis_gammaconfscore[:, 0, 0] -) +pred_int_width_gammaconfscore = y_pis_gammaconfscore[:, 1, 0] - y_pis_gammaconfscore[:, 0, 0] ############################################################################## @@ -188,8 +178,6 @@ def get_yerr(y_pred, y_pis): axs[1, img_id].set_xlim([xmin, xmax]) axs[1, img_id].set_ylim([ymin, ymax]) -fig.suptitle( - f"Predicted values with the prediction intervals of level {alpha}" -) +fig.suptitle(f"Predicted values with the prediction intervals of level {alpha}") plt.subplots_adjust(wspace=0.3, hspace=0.3) plt.show() diff --git a/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py b/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py index 28aedd9d..67355f04 100644 --- a/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py +++ b/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py @@ -28,7 +28,7 @@ def f(x: NDArray) -> NDArray: """Polynomial function used to generate one-dimensional data""" - return np.array(5 * x + 5 * x ** 4 - 9 * x ** 2) + return np.array(5 * x + 5 * x**4 - 9 * x**2) def get_heteroscedastic_data( @@ -133,10 +133,13 @@ def plot_1d_data( polyn_model_quant = Pipeline( [ ("poly", PolynomialFeatures(degree=4)), - ("linear", QuantileRegressor( - solver="highs-ds", - alpha=0, - )), + ( + "linear", + QuantileRegressor( + solver="highs-ds", + alpha=0, + ), + ), ] ) @@ -148,24 +151,16 @@ def plot_1d_data( "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)}, "conformalized_quantile_regression": {"method": "quantile", "cv": "split"}, } -fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots( - 2, 3, figsize=(3 * 6, 12) -) +fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, figsize=(3 * 6, 12)) axs = [ax1, ax2, ax3, ax4, ax5, ax6] for i, (strategy, params) in enumerate(STRATEGIES.items()): if strategy == "conformalized_quantile_regression": - mapie = MapieQuantileRegressor( # type: ignore - polyn_model_quant, - **params - ) + mapie = MapieQuantileRegressor(polyn_model_quant, **params) # type: ignore mapie.fit(X_train.reshape(-1, 1), y_train, random_state=random_state) y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1)) else: mapie = MapieRegressor( # type: ignore - polyn_model, - agg_function="median", - n_jobs=-1, - **params + polyn_model, agg_function="median", n_jobs=-1, **params ) mapie.fit(X_train.reshape(-1, 1), y_train) y_pred, y_pis = mapie.predict( diff --git a/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py b/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py index 9340739a..02e6d4d6 100644 --- a/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py +++ b/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py @@ -26,7 +26,7 @@ def f(x: NDArray) -> NDArray: """Polynomial function used to generate one-dimensional data""" - return np.array(5 * x + 5 * x ** 4 - 9 * x ** 2) + return np.array(5 * x + 5 * x**4 - 9 * x**2) def get_homoscedastic_data( @@ -131,10 +131,13 @@ def plot_1d_data( polyn_model_quant = Pipeline( [ ("poly", PolynomialFeatures(degree=4)), - ("linear", QuantileRegressor( - solver="highs-ds", - alpha=0, - )), + ( + "linear", + QuantileRegressor( + solver="highs-ds", + alpha=0, + ), + ), ] ) @@ -146,24 +149,16 @@ def plot_1d_data( "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)}, "conformalized_quantile_regression": {"method": "quantile", "cv": "split"}, } -fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots( - 2, 3, figsize=(3 * 6, 12) -) +fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, figsize=(3 * 6, 12)) axs = [ax1, ax2, ax3, ax4, ax5, ax6] for i, (strategy, params) in enumerate(STRATEGIES.items()): if strategy == "conformalized_quantile_regression": - mapie = MapieQuantileRegressor( # type: ignore - polyn_model_quant, - **params - ) + mapie = MapieQuantileRegressor(polyn_model_quant, **params) # type: ignore mapie.fit(X_train.reshape(-1, 1), y_train, random_state=random_state) y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1)) else: mapie = MapieRegressor( # type: ignore - polyn_model, - agg_function="median", - n_jobs=-1, - **params + polyn_model, agg_function="median", n_jobs=-1, **params ) mapie.fit(X_train.reshape(-1, 1), y_train) y_pred, y_pis = mapie.predict( diff --git a/examples/regression/1-quickstart/plot_prefit.py b/examples/regression/1-quickstart/plot_prefit.py index d982398b..b6ae2fdd 100644 --- a/examples/regression/1-quickstart/plot_prefit.py +++ b/examples/regression/1-quickstart/plot_prefit.py @@ -55,12 +55,8 @@ def f(x: NDArray) -> NDArray: y = f(X) + np.random.normal(0, sigma, n_samples) # Train/validation/test split -X_train_cal, X_test, y_train_cal, y_test = train_test_split( - X, y, test_size=1 / 10 -) -X_train, X_cal, y_train, y_cal = train_test_split( - X_train_cal, y_train_cal, test_size=1 / 9 -) +X_train_cal, X_test, y_train_cal, y_test = train_test_split(X, y, test_size=1 / 10) +X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=1 / 9) ############################################################################## @@ -85,7 +81,7 @@ def f(x: NDArray) -> NDArray: list_estimators_cqr = [] for alpha_ in [alpha / 2, (1 - (alpha / 2)), 0.5]: estimator_ = LGBMRegressor( - objective='quantile', + objective="quantile", alpha=alpha_, ) estimator_.fit(X_train.reshape(-1, 1), y_train) @@ -115,11 +111,7 @@ def f(x: NDArray) -> NDArray: # Evaluate prediction and coverage level on testing set y_pred_cqr, y_pis_cqr = mapie_cqr.predict(X_test.reshape(-1, 1)) -coverage_cqr = regression_coverage_score( - y_test, - y_pis_cqr[:, 0, 0], - y_pis_cqr[:, 1, 0] -) +coverage_cqr = regression_coverage_score(y_test, y_pis_cqr[:, 0, 0], y_pis_cqr[:, 1, 0]) ############################################################################## @@ -137,40 +129,28 @@ def f(x: NDArray) -> NDArray: order = np.argsort(X_test) plt.figure(figsize=(8, 8)) -plt.plot( - X_test[order], - y_pred[order], - label="Predictions MLP", - color="green" -) +plt.plot(X_test[order], y_pred[order], label="Predictions MLP", color="green") plt.fill_between( X_test[order], y_pis[:, 0, 0][order], y_pis[:, 1, 0][order], alpha=0.4, label="prediction intervals MP", - color="green" -) -plt.plot( - X_test[order], - y_pred_cqr[order], - label="Predictions LGBM", - color="blue" + color="green", ) +plt.plot(X_test[order], y_pred_cqr[order], label="Predictions LGBM", color="blue") plt.fill_between( X_test[order], y_pis_cqr[:, 0, 0][order], y_pis_cqr[:, 1, 0][order], alpha=0.4, label="prediction intervals MQP", - color="blue" + color="blue", ) plt.title( f"Target and effective coverages for:\n " - f"MLP with MapieRegressor alpha={alpha}: " - + f"({1 - alpha:.3f}, {coverage:.3f})\n" - f"LGBM with MapieQuantileRegressor alpha={alpha}: " - + f"({1 - alpha:.3f}, {coverage_cqr:.3f})" + f"MLP with MapieRegressor alpha={alpha}: " + f"({1 - alpha:.3f}, {coverage:.3f})\n" + f"LGBM with MapieQuantileRegressor alpha={alpha}: " + f"({1 - alpha:.3f}, {coverage_cqr:.3f})" ) plt.scatter(X_test, y_test, color="red", alpha=0.7, label="testing", s=2) plt.plot( @@ -193,11 +173,5 @@ def f(x: NDArray) -> NDArray: ) plt.xlabel("x") plt.ylabel("y") -plt.legend( - loc='upper center', - bbox_to_anchor=(0.5, -0.05), - fancybox=True, - shadow=True, - ncol=3 -) +plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=3) plt.show() diff --git a/examples/regression/1-quickstart/plot_timeseries_example.py b/examples/regression/1-quickstart/plot_timeseries_example.py index f1027fe3..f749af2d 100644 --- a/examples/regression/1-quickstart/plot_timeseries_example.py +++ b/examples/regression/1-quickstart/plot_timeseries_example.py @@ -35,14 +35,11 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit -from mapie.metrics import (regression_coverage_score, - regression_mean_width_score) +from mapie.metrics import regression_coverage_score, regression_mean_width_score from mapie.regression import MapieRegressor # Load input data and feature engineering -demand_df = pd.read_csv( - "../../data/demand_temperature.csv", parse_dates=True, index_col=0 -) +demand_df = pd.read_csv("../../data/demand_temperature.csv", parse_dates=True, index_col=0) demand_df["Date"] = pd.to_datetime(demand_df.index) demand_df["Weekofyear"] = demand_df.Date.dt.isocalendar().week.astype("int64") demand_df["Weekday"] = demand_df.Date.dt.isocalendar().day.astype("int64") @@ -82,19 +79,14 @@ # time, but a nested CV approach is preferred. # See the dedicated example in the gallery for more information. alpha = 0.1 -mapie = MapieRegressor( - best_est, method="plus", cv=n_splits, agg_function="median", n_jobs=-1 -) +mapie = MapieRegressor(best_est, method="plus", cv=n_splits, agg_function="median", n_jobs=-1) mapie.fit(X_train, y_train) y_pred, y_pis = mapie.predict(X_test, alpha=alpha) coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]) width = regression_mean_width_score(y_pis[:, 0, 0], y_pis[:, 1, 0]) # Print results -print( - "Coverage and prediction interval width mean for CV+: " - f"{coverage:.3f}, {width:.3f}" -) +print("Coverage and prediction interval width mean for CV+: " f"{coverage:.3f}, {width:.3f}") # Plot estimated prediction intervals on test set fig = plt.figure(figsize=(15, 5)) diff --git a/examples/regression/1-quickstart/plot_toy_model.py b/examples/regression/1-quickstart/plot_toy_model.py index 5cf8c9bb..888ad678 100644 --- a/examples/regression/1-quickstart/plot_toy_model.py +++ b/examples/regression/1-quickstart/plot_toy_model.py @@ -22,8 +22,7 @@ y_pred, y_pis = mapie.predict(X, alpha=alpha) coverage_scores = [ - regression_coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i]) - for i, _ in enumerate(alpha) + regression_coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i]) for i, _ in enumerate(alpha) ] plt.xlabel("x") diff --git a/examples/regression/2-advanced-analysis/README.rst b/examples/regression/2-advanced-analysis/README.rst index 9e0516c8..0b499eb9 100644 --- a/examples/regression/2-advanced-analysis/README.rst +++ b/examples/regression/2-advanced-analysis/README.rst @@ -3,4 +3,4 @@ 2. Advanced analysis -------------------- -The following examples use MAPIE for discussing more complex MAPIE problems. \ No newline at end of file +The following examples use MAPIE for discussing more complex MAPIE problems. diff --git a/examples/regression/2-advanced-analysis/plot_both_uncertainties.py b/examples/regression/2-advanced-analysis/plot_both_uncertainties.py index bd3a951a..102710b9 100644 --- a/examples/regression/2-advanced-analysis/plot_both_uncertainties.py +++ b/examples/regression/2-advanced-analysis/plot_both_uncertainties.py @@ -91,10 +91,13 @@ def get_1d_data_with_normal_distrib( polyn_model_quant = Pipeline( [ ("poly", PolynomialFeatures(degree=degree_polyn)), - ("linear", QuantileRegressor( - alpha=0, - solver="highs", # highs-ds does not give good results - )), + ( + "linear", + QuantileRegressor( + alpha=0, + solver="highs", # highs-ds does not give good results + ), + ), ] ) @@ -109,10 +112,7 @@ def get_1d_data_with_normal_distrib( y_pred, y_pis = {}, {} for strategy, params in STRATEGIES.items(): if strategy == "conformalized_quantile_regression": - mapie = MapieQuantileRegressor( # type: ignore - polyn_model_quant, - **params - ) + mapie = MapieQuantileRegressor(polyn_model_quant, **params) # type: ignore mapie.fit(X_train, y_train, random_state=random_state) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test) else: diff --git a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py index 1f1edd41..ea5e2801 100644 --- a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py +++ b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py @@ -13,7 +13,7 @@ :func:`~mapie.metrics.regression_ssc_score` and :func:`~mapie.metrics.hsic`. """ import warnings -from typing import Tuple, Union +from typing import Dict, Tuple, Union import pandas as pd import numpy as np @@ -23,11 +23,8 @@ from mapie._typing import NDArray from mapie.regression import MapieQuantileRegressor, MapieRegressor -from mapie.conformity_scores import (GammaConformityScore, - ResidualNormalisedScore) -from mapie.metrics import (regression_coverage_score_v2, - regression_ssc_score, - hsic, regression_ssc) +from mapie.conformity_scores import GammaConformityScore, ResidualNormalisedScore +from mapie.metrics import regression_coverage_score_v2, regression_ssc_score, hsic, regression_ssc from mapie.subsample import Subsample warnings.filterwarnings("ignore") @@ -84,20 +81,15 @@ def sin_with_controlled_noise( # Data generation min_x, max_x, n_samples = 0, 10, 3000 X_train, y_train = sin_with_controlled_noise(min_x, max_x, n_samples) -X_test, y_test = sin_with_controlled_noise(min_x, max_x, - int(n_samples * split_size)) +X_test, y_test = sin_with_controlled_noise(min_x, max_x, int(n_samples * split_size)) # Definition of our base models model = LGBMRegressor(random_state=random_state, alpha=0.5) -model_quant = LGBMRegressor( - objective="quantile", - alpha=0.5, - random_state=random_state -) +model_quant = LGBMRegressor(objective="quantile", alpha=0.5, random_state=random_state) # Definition of the experimental set up -STRATEGIES = { +STRATEGIES: Dict[str, dict] = { "CV+": { "method": "plus", "cv": 10, @@ -105,21 +97,17 @@ def sin_with_controlled_noise( "JK+ab_Gamma": { "method": "plus", "cv": Subsample(n_resamplings=100), - "conformity_score": GammaConformityScore() + "conformity_score": GammaConformityScore(), }, "ResidualNormalised": { "cv": "split", "conformity_score": ResidualNormalisedScore( - residual_estimator=LGBMRegressor( - alpha=0.5, - random_state=random_state), + residual_estimator=LGBMRegressor(alpha=0.5, random_state=random_state), split_size=0.7, - random_state=random_state - ) - }, - "CQR": { - "method": "quantile", "cv": "split", "alpha": alpha + random_state=random_state, + ), }, + "CQR": {"method": "quantile", "cv": "split", "alpha": alpha}, } y_pred, intervals, coverage, cond_coverage, coef_corr = {}, {}, {}, {}, {} @@ -127,44 +115,30 @@ def sin_with_controlled_noise( for strategy, params in STRATEGIES.items(): # computing predictions if strategy == "CQR": - mapie = MapieQuantileRegressor( - model_quant, - **params - ) + mapie = MapieQuantileRegressor(model_quant, **params) mapie.fit(X_train, y_train, random_state=random_state) y_pred[strategy], intervals[strategy] = mapie.predict(X_test) else: mapie = MapieRegressor(model, **params, random_state=random_state) mapie.fit(X_train, y_train) - y_pred[strategy], intervals[strategy] = mapie.predict( - X_test, alpha=alpha - ) + y_pred[strategy], intervals[strategy] = mapie.predict(X_test, alpha=alpha) # computing metrics - coverage[strategy] = regression_coverage_score_v2( - y_test, intervals[strategy] - ) - cond_coverage[strategy] = regression_ssc_score( - y_test, intervals[strategy], num_bins=num_bins - ) + coverage[strategy] = regression_coverage_score_v2(y_test, intervals[strategy]) + cond_coverage[strategy] = regression_ssc_score(y_test, intervals[strategy], num_bins=num_bins) coef_corr[strategy] = hsic(y_test, intervals[strategy]) # Visualisation of the estimated conditional coverage estimated_cond_cov = pd.DataFrame( - columns=["global coverage", "max coverage violation", "hsic"], - index=STRATEGIES.keys()) + columns=["global coverage", "max coverage violation", "hsic"], index=STRATEGIES.keys() +) for m, cov, ssc, coef in zip( - STRATEGIES.keys(), - coverage.values(), - cond_coverage.values(), - coef_corr.values() + STRATEGIES.keys(), coverage.values(), cond_coverage.values(), coef_corr.values() ): - estimated_cond_cov.loc[m] = [ - round(cov[0], 2), round(ssc[0], 2), round(coef[0], 2) - ] + estimated_cond_cov.loc[m] = [round(cov[0], 2), round(ssc[0], 2), round(coef[0], 2)] -with pd.option_context('display.max_rows', None, 'display.max_columns', None): +with pd.option_context("display.max_rows", None, "display.max_columns", None): print(estimated_cond_cov) ############################################################################## @@ -227,10 +201,7 @@ def plot_intervals(X, y, y_pred, intervals, title="", ax=None): # data ax.scatter(X.ravel(), y, color="#1f77b4", alpha=0.3, label="data") # predictions - ax.scatter( - X.ravel(), y_pred, - color="#ff7f0e", marker="+", label="predictions", alpha=0.5 - ) + ax.scatter(X.ravel(), y_pred, color="#ff7f0e", marker="+", label="predictions", alpha=0.5) # intervals for i in range(intervals.shape[-1]): ax.fill_between( @@ -238,7 +209,7 @@ def plot_intervals(X, y, y_pred, intervals, title="", ax=None): intervals[:, 0, i][order], intervals[:, 1, i][order], color="#ff7f0e", - alpha=0.3 + alpha=0.3, ) ax.set_xlabel("x") @@ -269,22 +240,20 @@ def plot_coverage_by_width(y, intervals, num_bins, alpha, title="", ax=None): if ax is None: fig, ax = plt.subplots(figsize=(6, 5)) - ax.bar( - np.arange(num_bins), - regression_ssc(y, intervals, num_bins=num_bins)[0] - ) - ax.axhline(y=1 - alpha, color='r', linestyle='-') + ax.bar(np.arange(num_bins), regression_ssc(y, intervals, num_bins=num_bins)[0]) + ax.axhline(y=1 - alpha, color="r", linestyle="-") ax.set_title(title) ax.set_xlabel("intervals grouped by size") ax.set_ylabel("coverage") - ax.tick_params( - axis='x', which='both', bottom=False, top=False, labelbottom=False - ) + ax.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False) -max_width = np.max([ - np.abs(intervals[strategy][:, 0, 0] - intervals[strategy][:, 1, 0]) - for strategy in STRATEGIES.keys()]) +max_width = np.max( + [ + np.abs(intervals[strategy][:, 0, 0] - intervals[strategy][:, 1, 0]) + for strategy in STRATEGIES.keys() + ] +) fig_distr, axs_distr = plt.subplots(nrows=2, ncols=2, figsize=(12, 10)) fig_viz, axs_viz = plt.subplots(nrows=2, ncols=2, figsize=(12, 10)) @@ -294,17 +263,14 @@ def plot_coverage_by_width(y, intervals, num_bins, alpha, title="", ax=None): axs_viz.flat, axs_hist.flat, axs_distr.flat, STRATEGIES.keys() ): plot_intervals( - X_test, y_test, y_pred[strategy], intervals[strategy], - title=strategy, ax=ax_viz + X_test, y_test, y_pred[strategy], intervals[strategy], title=strategy, ax=ax_viz ) plot_coverage_by_width( - y_test, intervals[strategy], - num_bins=num_bins, alpha=alpha, title=strategy, ax=ax_hist + y_test, intervals[strategy], num_bins=num_bins, alpha=alpha, title=strategy, ax=ax_hist ) ax_distr.hist( - np.abs(intervals[strategy][:, 0, 0] - intervals[strategy][:, 1, 0]), - bins=num_bins + np.abs(intervals[strategy][:, 0, 0] - intervals[strategy][:, 1, 0]), bins=num_bins ) ax_distr.set_xlabel("Interval width") ax_distr.set_ylabel("Occurences") diff --git a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py index 293404ca..26b4121a 100644 --- a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py +++ b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py @@ -24,11 +24,10 @@ from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split -from mapie.conformity_scores import (AbsoluteConformityScore, - ResidualNormalisedScore) +from mapie.conformity_scores import AbsoluteConformityScore, ResidualNormalisedScore from mapie.regression import MapieRegressor -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") random_state = 15 @@ -39,12 +38,8 @@ # # Here, we propose just to generate data for regression task, then split it. -X, y = make_regression( - n_samples=1000, n_features=1, noise=20, random_state=random_state -) -X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=random_state -) +X, y = make_regression(n_samples=1000, n_features=1, noise=20, random_state=random_state) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=random_state) plt.xlabel("x") plt.ylabel("y") @@ -61,11 +56,12 @@ # :class:`~mapie.regression.MapieRegressor` to add a new method named # `get_cumulative_distribution_function`. -class MapieConformalPredictiveDistribution(MapieRegressor): +class MapieConformalPredictiveDistribution(MapieRegressor): def __init__(self, **kwargs) -> None: super().__init__(**kwargs) - self.conformity_score.sym = False + if self.conformity_score: + self.conformity_score.sym = False def get_cumulative_distribution_function(self, X): y_pred = self.predict(X) @@ -85,8 +81,8 @@ def get_cumulative_distribution_function(self, X): mapie_regressor_1 = MapieConformalPredictiveDistribution( estimator=LinearRegression(), conformity_score=AbsoluteConformityScore(), - cv='split', - random_state=random_state + cv="split", + random_state=random_state, ) mapie_regressor_1.fit(X_train, y_train) @@ -96,8 +92,8 @@ def get_cumulative_distribution_function(self, X): mapie_regressor_2 = MapieConformalPredictiveDistribution( estimator=LinearRegression(), conformity_score=ResidualNormalisedScore(), - cv='split', - random_state=random_state + cv="split", + random_state=random_state, ) mapie_regressor_2.fit(X_train, y_train) @@ -124,7 +120,7 @@ def get_cumulative_distribution_function(self, X): def plot_cdf(data, bins, **kwargs): counts, bins = np.histogram(data, bins=bins) - cdf = np.cumsum(counts)/np.sum(counts) + cdf = np.cumsum(counts) / np.sum(counts) plt.plot( np.vstack((bins, np.roll(bins, -1))).T.flatten()[:-2], @@ -133,14 +129,8 @@ def plot_cdf(data, bins, **kwargs): ) -plot_cdf( - y_cdf_1[0], bins=nb_bins, label='Absolute Residual Score', alpha=0.8 -) -plot_cdf( - y_cdf_2[0], bins=nb_bins, label='Normalized Residual Score', alpha=0.8 -) -plt.vlines( - y_pred_1[0], 0, 1, label='Prediction', color="C2", linestyles='dashed' -) +plot_cdf(y_cdf_1[0], bins=nb_bins, label="Absolute Residual Score", alpha=0.8) +plot_cdf(y_cdf_2[0], bins=nb_bins, label="Normalized Residual Score", alpha=0.8) +plt.vlines(y_pred_1[0], 0, 1, label="Prediction", color="C2", linestyles="dashed") plt.legend(loc=2) plt.show() diff --git a/examples/regression/2-advanced-analysis/plot_nested-cv.py b/examples/regression/2-advanced-analysis/plot_nested-cv.py index 3f0eaee5..2d32c313 100644 --- a/examples/regression/2-advanced-analysis/plot_nested-cv.py +++ b/examples/regression/2-advanced-analysis/plot_nested-cv.py @@ -56,7 +56,7 @@ # Load the Boston data data_url = "http://lib.stat.cmu.edu/datasets/boston" -raw_df = pd.read_csv(data_url, sep=r'\s+', skiprows=22, header=None) +raw_df = pd.read_csv(data_url, sep=r"\s+", skiprows=22, header=None) X_boston = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) y_boston = raw_df.values[1::2, 2] @@ -90,13 +90,10 @@ cv_obj.fit(X_train, y_train) best_est = cv_obj.best_estimator_ mapie_non_nested = MapieRegressor( - best_est, method="plus", cv=cv, agg_function="median", n_jobs=-1, - random_state=random_state + best_est, method="plus", cv=cv, agg_function="median", n_jobs=-1, random_state=random_state ) mapie_non_nested.fit(X_train, y_train) -y_pred_non_nested, y_pis_non_nested = mapie_non_nested.predict( - X_test, alpha=alpha -) +y_pred_non_nested, y_pis_non_nested = mapie_non_nested.predict(X_test, alpha=alpha) widths_non_nested = y_pis_non_nested[:, 1, 0] - y_pis_non_nested[:, 0, 0] coverage_non_nested = regression_coverage_score( y_test, y_pis_non_nested[:, 0, 0], y_pis_non_nested[:, 1, 0] @@ -116,29 +113,22 @@ n_jobs=-1, ) mapie_nested = MapieRegressor( - cv_obj, method="plus", cv=cv, agg_function="median", - random_state=random_state + cv_obj, method="plus", cv=cv, agg_function="median", random_state=random_state ) mapie_nested.fit(X_train, y_train) y_pred_nested, y_pis_nested = mapie_nested.predict(X_test, alpha=alpha) widths_nested = y_pis_nested[:, 1, 0] - y_pis_nested[:, 0, 0] -coverage_nested = regression_coverage_score( - y_test, y_pis_nested[:, 0, 0], y_pis_nested[:, 1, 0] -) +coverage_nested = regression_coverage_score(y_test, y_pis_nested[:, 0, 0], y_pis_nested[:, 1, 0]) score_nested = mean_squared_error(y_test, y_pred_nested, squared=False) # Print scores and effective coverages. -print( - "Scores and effective coverages for the CV+ strategy using the " - "Random Forest model." -) +print("Scores and effective coverages for the CV+ strategy using the " "Random Forest model.") print( "Score on the test set for the non-nested and nested CV approaches: ", f"{score_non_nested: .3f}, {score_nested: .3f}", ) print( - "Effective coverage on the test set for the non-nested " - "and nested CV approaches: ", + "Effective coverage on the test set for the non-nested " "and nested CV approaches: ", f"{coverage_non_nested: .3f}, {coverage_nested: .3f}", ) @@ -153,9 +143,7 @@ ax1.scatter(widths_nested, widths_non_nested) ax1.plot([min_x, max_x], [min_x, max_x], ls="--", color="k") ax2.axvline(x=0, color="r", lw=2) -ax2.set_xlabel( - "[width(non-nested CV) - width(nested CV)] / width(non-nested CV)" -) +ax2.set_xlabel("[width(non-nested CV) - width(nested CV)] / width(non-nested CV)") ax2.set_ylabel("Counts") ax2.hist( (widths_non_nested - widths_nested) / widths_non_nested, diff --git a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py index 2b28a048..5a39d485 100644 --- a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py +++ b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py @@ -36,8 +36,7 @@ from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit from mapie._typing import NDArray -from mapie.metrics import (regression_coverage_score, - regression_mean_width_score) +from mapie.metrics import regression_coverage_score, regression_mean_width_score from mapie.subsample import BlockBootstrap from mapie.regression import MapieTimeSeriesRegressor @@ -67,9 +66,7 @@ f"Lag_{hour}" for hour in range(1, n_lags) ] -X_train = demand_train.loc[ - ~np.any(demand_train[features].isnull(), axis=1), features -] +X_train = demand_train.loc[~np.any(demand_train[features].isnull(), axis=1), features] y_train = demand_train.loc[X_train.index, "Demand"] X_test = demand_test.loc[:, features] y_test = demand_test["Demand"] @@ -97,9 +94,7 @@ model = cv_obj.best_estimator_ else: # Model: Random Forest previously optimized with a cross-validation - model = RandomForestRegressor( - max_depth=10, n_estimators=50, random_state=59 - ) + model = RandomForestRegressor(max_depth=10, n_estimators=50, random_state=59) # Estimate prediction intervals on test set with best estimator alpha = 0.05 @@ -137,20 +132,18 @@ ( y_pred_pfit_enbpi[:step_size], y_pis_pfit_enbpi[:step_size, :, :], -) = mapie_enpbi.predict( - X_test.iloc[:step_size, :], alpha=alpha, ensemble=True, optimize_beta=True -) +) = mapie_enpbi.predict(X_test.iloc[:step_size, :], alpha=alpha, ensemble=True, optimize_beta=True) for step in range(step_size, len(X_test), step_size): mapie_enpbi.partial_fit( - X_test.iloc[(step - step_size):step, :], - y_test.iloc[(step - step_size):step], + X_test.iloc[(step - step_size) : step, :], + y_test.iloc[(step - step_size) : step], ) ( - y_pred_pfit_enbpi[step:step + step_size], - y_pis_pfit_enbpi[step:step + step_size, :, :], + y_pred_pfit_enbpi[step : step + step_size], + y_pis_pfit_enbpi[step : step + step_size, :, :], ) = mapie_enpbi.predict( - X_test.iloc[step:(step + step_size), :], + X_test.iloc[step : (step + step_size), :], alpha=alpha, ensemble=True, optimize_beta=True, @@ -191,9 +184,7 @@ results = [enbpi_no_pfit, enbpi_pfit] # Plot estimated prediction intervals on test set -fig, axs = plt.subplots( - nrows=2, ncols=1, figsize=(15, 12), sharex="col" -) +fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(15, 12), sharex="col") for i, (ax, w, result) in enumerate( zip(axs, ["EnbPI, without partial_fit", "EnbPI with partial_fit"], results) @@ -221,13 +212,12 @@ ) ax.set_title( - w + "\n" - f"Coverage:{result['coverage']:.3f} Width:{result['width']:.3f}", + w + "\n" f"Coverage:{result['coverage']:.3f} Width:{result['width']:.3f}", fontweight="bold", - size=20 + size=20, ) plt.xticks(size=15, rotation=45) plt.yticks(size=15) -axs[0].legend(prop={'size': 22}) +axs[0].legend(prop={"size": 22}) plt.show() diff --git a/examples/regression/3-scientific-articles/README.rst b/examples/regression/3-scientific-articles/README.rst index e53e2c12..0d1a9d54 100644 --- a/examples/regression/3-scientific-articles/README.rst +++ b/examples/regression/3-scientific-articles/README.rst @@ -5,4 +5,4 @@ The following examples reproduce the simulations from the scientific articles that introduce the methods implemented -in MAPIE for regression settings. \ No newline at end of file +in MAPIE for regression settings. diff --git a/examples/regression/3-scientific-articles/plot_barber2020_simulations.py b/examples/regression/3-scientific-articles/plot_barber2020_simulations.py index 010be9b8..f5d19740 100644 --- a/examples/regression/3-scientific-articles/plot_barber2020_simulations.py +++ b/examples/regression/3-scientific-articles/plot_barber2020_simulations.py @@ -35,8 +35,7 @@ from sklearn.linear_model import LinearRegression from mapie._typing import NDArray -from mapie.metrics import (regression_coverage_score, - regression_mean_width_score) +from mapie.metrics import regression_coverage_score, regression_mean_width_score from mapie.regression import MapieRegressor @@ -111,27 +110,18 @@ def PIs_vs_dimensions( for strategy, params in strategies.items(): mapie = MapieRegressor( - LinearRegression(), - agg_function="median", - n_jobs=-1, - **params + LinearRegression(), agg_function="median", n_jobs=-1, **params ) mapie.fit(X_train, y_train) _, y_pis = mapie.predict(X_test, alpha=alpha) - coverage = regression_coverage_score( - y_test, y_pis[:, 0, 0], y_pis[:, 1, 0] - ) + coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]) results[strategy][dimension]["coverage"][trial] = coverage - width_mean = regression_mean_width_score( - y_pis[:, 0, 0], y_pis[:, 1, 0] - ) + width_mean = regression_mean_width_score(y_pis[:, 0, 0], y_pis[:, 1, 0]) results[strategy][dimension]["width_mean"][trial] = width_mean return results -def plot_simulation_results( - results: Dict[str, Dict[int, Dict[str, NDArray]]], title: str -) -> None: +def plot_simulation_results(results: Dict[str, Dict[int, Dict[str, NDArray]]], title: str) -> None: """ Show the prediction interval coverages and widths as a function of dimension values for selected strategies with standard error diff --git a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py index a0ecaf1e..81e0d220 100644 --- a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py +++ b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py @@ -44,8 +44,7 @@ from sklearn.model_selection import train_test_split from mapie._typing import ArrayLike, NDArray -from mapie.metrics import (regression_coverage_score, - regression_mean_width_score) +from mapie.metrics import regression_coverage_score, regression_mean_width_score from mapie.regression import MapieRegressor from mapie.subsample import Subsample @@ -204,21 +203,13 @@ def get_coverage_width(PIs: pd.DataFrame, y: NDArray) -> Tuple[float, float]: (coverage, width) : Tuple[float, float] The mean coverage and width of the PIs. """ - coverage = regression_coverage_score( - y_true=y, y_pred_low=PIs["lower"], y_pred_up=PIs["upper"] - ) - width = regression_mean_width_score( - y_pred_low=PIs["lower"], y_pred_up=PIs["upper"] - ) + coverage = regression_coverage_score(y_true=y, y_pred_low=PIs["lower"], y_pred_up=PIs["upper"]) + width = regression_mean_width_score(y_pred_low=PIs["lower"], y_pred_up=PIs["upper"]) return (coverage, width) def B_random_from_B_fixed( - B: int, - train_size: int, - m: int, - itrial: int = 0, - random_state: int = 98765 + B: int, train_size: int, m: int, itrial: int = 0, random_state: int = 98765 ) -> int: """ Generates a random number from a binomial distribution. @@ -310,9 +301,7 @@ def comparison_JAB( ) (X, y) = get_X_y() - m_vals = np.round( - train_size * np.linspace(0.1, 1, num=boostrap_size) - ).astype(int) + m_vals = np.round(train_size * np.linspace(0.1, 1, num=boostrap_size)).astype(int) result_index = 0 for itrial in range(trials): @@ -342,9 +331,7 @@ def comparison_JAB( for i_m, m in enumerate(m_vals): # J+aB, random B - B_random = B_random_from_B_fixed( - B_fixed, train_size, m, itrial=i_m - ) + B_random = B_random_from_B_fixed(B_fixed, train_size, m, itrial=i_m) subsample_B_random = Subsample( n_resamplings=B_random, n_samples=m, diff --git a/examples/regression/4-tutorials/README.rst b/examples/regression/4-tutorials/README.rst index 65af0e28..f7e511d3 100644 --- a/examples/regression/4-tutorials/README.rst +++ b/examples/regression/4-tutorials/README.rst @@ -3,4 +3,4 @@ 4. Tutorials ------------ -The following examples present pedagogical tutorials explaining how to use MAPIE on different regression taks. \ No newline at end of file +The following examples present pedagogical tutorials explaining how to use MAPIE on different regression taks. diff --git a/examples/regression/4-tutorials/plot_cqr_tutorial.py b/examples/regression/4-tutorials/plot_cqr_tutorial.py index f370fa78..5f076f82 100644 --- a/examples/regression/4-tutorials/plot_cqr_tutorial.py +++ b/examples/regression/4-tutorials/plot_cqr_tutorial.py @@ -31,6 +31,8 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is import warnings +from typing import Dict + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -41,8 +43,7 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is from sklearn.datasets import fetch_california_housing from sklearn.model_selection import KFold, RandomizedSearchCV, train_test_split -from mapie.metrics import (regression_coverage_score, - regression_mean_width_score) +from mapie.metrics import regression_coverage_score, regression_mean_width_score from mapie.regression import MapieQuantileRegressor, MapieRegressor from mapie.subsample import Subsample @@ -74,8 +75,8 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is df = pd.concat([X, y], axis=1) -pear_corr = df.corr(method='pearson') -pear_corr.style.background_gradient(cmap='Greens', axis=0) +pear_corr = df.corr(method="pearson") +pear_corr.style.background_gradient(cmap="Greens", axis=0) ############################################################################## @@ -86,7 +87,7 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is axs.hist(y, bins=50) axs.set_xlabel("Median price of houses") axs.set_title("Histogram of house prices") -axs.xaxis.set_major_formatter(FormatStrFormatter('%.0f' + "k")) +axs.xaxis.set_major_formatter(FormatStrFormatter("%.0f" + "k")) plt.show() @@ -96,16 +97,8 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is # calibrating the prediction intervals. -X_train, X_test, y_train, y_test = train_test_split( - X, - y['MedHouseVal'], - random_state=random_state -) -X_train, X_calib, y_train, y_calib = train_test_split( - X_train, - y_train, - random_state=random_state -) +X_train, X_test, y_train, y_test = train_test_split(X, y["MedHouseVal"], random_state=random_state) +X_train, X_calib, y_train, y_calib = train_test_split(X_train, y_train, random_state=random_state) ############################################################################## @@ -118,16 +111,12 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is # to find the optimal model to predict the house prices. -estimator = LGBMRegressor( - objective='quantile', - alpha=0.5, - random_state=random_state -) +estimator = LGBMRegressor(objective="quantile", alpha=0.5, random_state=random_state) params_distributions = dict( num_leaves=randint(low=10, high=50), max_depth=randint(low=3, high=20), n_estimators=randint(low=50, high=100), - learning_rate=uniform() + learning_rate=uniform(), ) optim_model = RandomizedSearchCV( estimator, @@ -136,7 +125,7 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is n_iter=10, cv=KFold(n_splits=5, shuffle=True), verbose=0, - random_state=random_state + random_state=random_state, ) optim_model.fit(X_train, y_train) estimator = optim_model.best_estimator_ @@ -177,43 +166,51 @@ def plot_prediction_intervals( upper_bound, coverage, width, - num_plots_idx + num_plots_idx, ): """ Plot of the prediction intervals for each different conformal method. """ - axs.yaxis.set_major_formatter(FormatStrFormatter('%.0f' + "k")) - axs.xaxis.set_major_formatter(FormatStrFormatter('%.0f' + "k")) + axs.yaxis.set_major_formatter(FormatStrFormatter("%.0f" + "k")) + axs.xaxis.set_major_formatter(FormatStrFormatter("%.0f" + "k")) lower_bound_ = np.take(lower_bound, num_plots_idx) y_pred_sorted_ = np.take(y_pred_sorted, num_plots_idx) y_test_sorted_ = np.take(y_test_sorted, num_plots_idx) - error = y_pred_sorted_-lower_bound_ + error = y_pred_sorted_ - lower_bound_ - warning1 = y_test_sorted_ > y_pred_sorted_+error - warning2 = y_test_sorted_ < y_pred_sorted_-error + warning1 = y_test_sorted_ > y_pred_sorted_ + error + warning2 = y_test_sorted_ < y_pred_sorted_ - error warnings = warning1 + warning2 axs.errorbar( y_test_sorted_[~warnings], y_pred_sorted_[~warnings], yerr=np.abs(error[~warnings]), - capsize=5, marker="o", elinewidth=2, linewidth=0, - label="Inside prediction interval" - ) + capsize=5, + marker="o", + elinewidth=2, + linewidth=0, + label="Inside prediction interval", + ) axs.errorbar( y_test_sorted_[warnings], y_pred_sorted_[warnings], yerr=np.abs(error[warnings]), - capsize=5, marker="o", elinewidth=2, linewidth=0, color="red", - label="Outside prediction interval" - ) + capsize=5, + marker="o", + elinewidth=2, + linewidth=0, + color="red", + label="Outside prediction interval", + ) axs.scatter( y_test_sorted_[warnings], y_test_sorted_[warnings], - marker="*", color="green", - label="True value" + marker="*", + color="green", + label="True value", ) axs.set_xlabel("True house prices in $") axs.set_ylabel("Prediction of house prices in $") @@ -222,15 +219,15 @@ def plot_prediction_intervals( f"Coverage: {np.round(coverage, round_to)}\n" + f"Interval width: {np.round(width, round_to)}" ), - xy=(np.min(y_test_sorted_)*3, np.max(y_pred_sorted_+error)*0.95), - ) + xy=(np.min(y_test_sorted_) * 3, np.max(y_pred_sorted_ + error) * 0.95), + ) lims = [ np.min([axs.get_xlim(), axs.get_ylim()]), # min of both axes np.max([axs.get_xlim(), axs.get_ylim()]), # max of both axes ] - axs.plot(lims, lims, '--', alpha=0.75, color="black", label="x=y") + axs.plot(lims, lims, "--", alpha=0.75, color="black", label="x=y") axs.add_artist(ab) - axs.set_title(title, fontweight='bold') + axs.set_title(title, fontweight="bold") ############################################################################## @@ -254,7 +251,7 @@ def plot_prediction_intervals( # (``quantile_estimator_params``) and that we will use symmetrical residuals. -STRATEGIES = { +STRATEGIES: Dict[str, dict] = { "naive": {"method": "naive"}, "cv_plus": {"method": "plus", "cv": 10}, "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)}, @@ -266,11 +263,7 @@ def plot_prediction_intervals( for strategy, params in STRATEGIES.items(): if strategy == "cqr": mapie = MapieQuantileRegressor(estimator, **params) - mapie.fit( - X_train, y_train, - X_calib=X_calib, y_calib=y_calib, - random_state=random_state - ) + mapie.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib, random_state=random_state) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test) else: mapie = MapieRegressor(estimator, **params, random_state=random_state) @@ -280,17 +273,14 @@ def plot_prediction_intervals( y_test_sorted[strategy], y_pred_sorted[strategy], lower_bound[strategy], - upper_bound[strategy] + upper_bound[strategy], ) = sort_y_values(y_test, y_pred[strategy], y_pis[strategy]) coverage[strategy] = regression_coverage_score( - y_test, - y_pis[strategy][:, 0, 0], - y_pis[strategy][:, 1, 0] - ) + y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0] + ) width[strategy] = regression_mean_width_score( - y_pis[strategy][:, 0, 0], - y_pis[strategy][:, 1, 0] - ) + y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0] + ) ############################################################################## @@ -299,9 +289,7 @@ def plot_prediction_intervals( perc_obs_plot = 0.02 -num_plots = rng.choice( - len(y_test), int(perc_obs_plot*len(y_test)), replace=False - ) +num_plots = rng.choice(len(y_test), int(perc_obs_plot * len(y_test)), replace=False) fig, axs = plt.subplots(2, 2, figsize=(15, 13)) coords = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1]] for strategy, coord in zip(STRATEGIES.keys(), coords): @@ -314,17 +302,20 @@ def plot_prediction_intervals( upper_bound[strategy], coverage[strategy], width[strategy], - num_plots - ) + num_plots, + ) lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes] +lines: list +labels: list lines, labels = [sum(_, []) for _ in zip(*lines_labels)] plt.legend( - lines[:4], labels[:4], - loc='upper center', + lines[:4], + labels[:4], + loc="upper center", bbox_to_anchor=(0, -0.15), fancybox=True, shadow=True, - ncol=2 + ncol=2, ) plt.show() @@ -336,15 +327,7 @@ def plot_prediction_intervals( # are increased with the increase in price. -def get_coverages_widths_by_bins( - want, - y_test, - y_pred, - lower_bound, - upper_bound, - STRATEGIES, - bins -): +def get_coverages_widths_by_bins(want, y_test, y_pred, lower_bound, upper_bound, STRATEGIES, bins): """ Given the results from MAPIE, this function split the data according the the test values into bins and calculates coverage @@ -355,42 +338,30 @@ def get_coverages_widths_by_bins( for item in cuts_: cuts.append(item.left) cuts.append(cuts_[-1].right) - cuts.append(np.max(y_test["naive"])+1) + cuts.append(np.max(y_test["naive"]) + 1) recap = {} for i in range(len(cuts) - 1): - cut1, cut2 = cuts[i], cuts[i+1] + cut1, cut2 = cuts[i], cuts[i + 1] name = f"[{np.round(cut1, 0)}, {np.round(cut2, 0)}]" recap[name] = [] for strategy in STRATEGIES: - indices = np.where( - (y_test[strategy] > cut1) * (y_test[strategy] <= cut2) - ) + indices = np.where((y_test[strategy] > cut1) * (y_test[strategy] <= cut2)) y_test_trunc = np.take(y_test[strategy], indices) y_low_ = np.take(lower_bound[strategy], indices) y_high_ = np.take(upper_bound[strategy], indices) if want == "coverage": - recap[name].append(regression_coverage_score( - y_test_trunc[0], - y_low_[0], - y_high_[0] - )) - elif want == "width": recap[name].append( - regression_mean_width_score(y_low_[0], y_high_[0]) + regression_coverage_score(y_test_trunc[0], y_low_[0], y_high_[0]) ) + elif want == "width": + recap[name].append(regression_mean_width_score(y_low_[0], y_high_[0])) recap_df = pd.DataFrame(recap, index=STRATEGIES) return recap_df bins = list(np.arange(0, 1, 0.1)) binned_data = get_coverages_widths_by_bins( - "coverage", - y_test_sorted, - y_pred_sorted, - lower_bound, - upper_bound, - STRATEGIES, - bins + "coverage", y_test_sorted, y_pred_sorted, lower_bound, upper_bound, STRATEGIES, bins ) @@ -421,13 +392,7 @@ def get_coverages_widths_by_bins( binned_data = get_coverages_widths_by_bins( - "width", - y_test_sorted, - y_pred_sorted, - lower_bound, - upper_bound, - STRATEGIES, - bins + "width", y_test_sorted, y_pred_sorted, lower_bound, upper_bound, STRATEGIES, bins ) diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py index 50c2fd48..8455344b 100644 --- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py +++ b/examples/regression/4-tutorials/plot_main-tutorial-regression.py @@ -31,6 +31,8 @@ import os import warnings +from typing import Dict + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -57,7 +59,7 @@ def x_sinx(x): """One-dimensional x*sin(x) function.""" - return x*np.sin(x) + return x * np.sin(x) def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): @@ -68,13 +70,11 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): np.random.seed(59) X_train = np.linspace(min_x, max_x, n_samples) np.random.shuffle(X_train) - X_test = np.linspace(min_x, max_x, n_samples*5) + X_test = np.linspace(min_x, max_x, n_samples * 5) y_train, y_mesh, y_test = funct(X_train), funct(X_test), funct(X_test) y_train += np.random.normal(0, noise, y_train.shape[0]) y_test += np.random.normal(0, noise, y_test.shape[0]) - return ( - X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh - ) + return (X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh) ############################################################################## @@ -104,18 +104,18 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): degree_polyn = 10 polyn_model = Pipeline( - [ - ("poly", PolynomialFeatures(degree=degree_polyn)), - ("linear", LinearRegression()) - ] + [("poly", PolynomialFeatures(degree=degree_polyn)), ("linear", LinearRegression())] ) polyn_model_quant = Pipeline( [ ("poly", PolynomialFeatures(degree=degree_polyn)), - ("linear", QuantileRegressor( + ( + "linear", + QuantileRegressor( solver="highs", alpha=0, - )) + ), + ), ] ) @@ -126,7 +126,7 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): # are then saved in a DataFrame. Here, we set an alpha value of 0.05 # in order to obtain a 95% confidence for our prediction intervals. -STRATEGIES = { +STRATEGIES: Dict[str, dict] = { "naive": dict(method="naive"), "jackknife": dict(method="base", cv=-1), "jackknife_plus": dict(method="plus", cv=-1), @@ -135,12 +135,8 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): "cv_plus": dict(method="plus", cv=10), "cv_minmax": dict(method="minmax", cv=10), "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)), - "jackknife_minmax_ab": dict( - method="minmax", cv=Subsample(n_resamplings=50) - ), - "conformalized_quantile_regression": dict( - method="quantile", cv="split", alpha=0.05 - ) + "jackknife_minmax_ab": dict(method="minmax", cv=Subsample(n_resamplings=50)), + "conformalized_quantile_regression": dict(method="quantile", cv="split", alpha=0.05), } y_pred, y_pis = {}, {} for strategy, params in STRATEGIES.items(): @@ -166,16 +162,7 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): def plot_1d_data( - X_train, - y_train, - X_test, - y_test, - y_sigma, - y_pred, - y_pred_low, - y_pred_up, - ax=None, - title=None + X_train, y_train, X_test, y_test, y_sigma, y_pred, y_pred_low, y_pred_up, ax=None, title=None ): ax.set_xlabel("x") ax.set_ylabel("y") @@ -184,9 +171,7 @@ def plot_1d_data( ax.plot(X_test, y_test, color="gray", label="True confidence intervals") ax.plot(X_test, y_test - y_sigma, color="gray", ls="--") ax.plot(X_test, y_test + y_sigma, color="gray", ls="--") - ax.plot( - X_test, y_pred, color="blue", alpha=0.5, label="Prediction intervals" - ) + ax.plot(X_test, y_pred, color="blue", alpha=0.5, label="Prediction intervals") if title is not None: ax.set_title(title) ax.legend() @@ -198,7 +183,7 @@ def plot_1d_data( "cv_plus", "cv_minmax", "jackknife_plus_ab", - "conformalized_quantile_regression" + "conformalized_quantile_regression", ] n_figs = len(strategies) fig, axs = plt.subplots(3, 2, figsize=(9, 13)) @@ -209,12 +194,12 @@ def plot_1d_data( y_train.ravel(), X_test.ravel(), y_mesh.ravel(), - np.full((X_test.shape[0]), 1.96*noise).ravel(), + np.full((X_test.shape[0]), 1.96 * noise).ravel(), y_pred[strategy].ravel(), y_pis[strategy][:, 0, 0].ravel(), y_pis[strategy][:, 1, 0].ravel(), ax=coord, - title=strategy + title=strategy, ) plt.show() @@ -226,13 +211,9 @@ def plot_1d_data( fig, ax = plt.subplots(1, 1, figsize=(9, 5)) -ax.axhline(1.96*2*noise, ls="--", color="k", label="True width") +ax.axhline(1.96 * 2 * noise, ls="--", color="k", label="True width") for strategy in STRATEGIES: - ax.plot( - X_test, - y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0], - label=strategy - ) + ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0], label=strategy) ax.set_xlabel("x") ax.set_ylabel("Prediction Interval Width") ax.legend(fontsize=8) @@ -259,16 +240,17 @@ def plot_1d_data( # the different strategies. -pd.DataFrame([ +pd.DataFrame( [ - regression_coverage_score( - y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0] - ), - ( - y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0] - ).mean() - ] for strategy in STRATEGIES -], index=STRATEGIES, columns=["Coverage", "Width average"]).round(2) + [ + regression_coverage_score(y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]), + (y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]).mean(), + ] + for strategy in STRATEGIES + ], + index=STRATEGIES, + columns=["Coverage", "Width average"], +).round(2) ############################################################################## @@ -285,9 +267,8 @@ def plot_1d_data( # function that generates one-dimensional data with normal noise uniformely # in a given interval. -def get_1d_data_with_heteroscedastic_noise( - funct, min_x, max_x, n_samples, noise -): + +def get_1d_data_with_heteroscedastic_noise(funct, min_x, max_x, n_samples, noise): """ Generate 1D noisy data uniformely from the given function and standard deviation for the noise. @@ -295,19 +276,11 @@ def get_1d_data_with_heteroscedastic_noise( np.random.seed(59) X_train = np.linspace(min_x, max_x, n_samples) np.random.shuffle(X_train) - X_test = np.linspace(min_x, max_x, n_samples*5) - y_train = ( - funct(X_train) + - (np.random.normal(0, noise, len(X_train)) * X_train) - ) - y_test = ( - funct(X_test) + - (np.random.normal(0, noise, len(X_test)) * X_test) - ) + X_test = np.linspace(min_x, max_x, n_samples * 5) + y_train = funct(X_train) + (np.random.normal(0, noise, len(X_train)) * X_train) + y_test = funct(X_test) + (np.random.normal(0, noise, len(X_test)) * X_test) y_mesh = funct(X_test) - return ( - X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh - ) + return (X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh) ############################################################################## @@ -317,9 +290,7 @@ def get_1d_data_with_heteroscedastic_noise( min_x, max_x, n_samples, noise = 0, 5, 300, 0.5 -( - X_train, y_train, X_test, y_test, y_mesh -) = get_1d_data_with_heteroscedastic_noise( +(X_train, y_train, X_test, y_test, y_mesh) = get_1d_data_with_heteroscedastic_noise( x_sinx, min_x, max_x, n_samples, noise ) @@ -341,18 +312,18 @@ def get_1d_data_with_heteroscedastic_noise( degree_polyn = 10 polyn_model = Pipeline( - [ - ("poly", PolynomialFeatures(degree=degree_polyn)), - ("linear", LinearRegression()) - ] + [("poly", PolynomialFeatures(degree=degree_polyn)), ("linear", LinearRegression())] ) polyn_model_quant = Pipeline( [ ("poly", PolynomialFeatures(degree=degree_polyn)), - ("linear", QuantileRegressor( + ( + "linear", + QuantileRegressor( solver="highs", alpha=0, - )) + ), + ), ] ) @@ -372,9 +343,7 @@ def get_1d_data_with_heteroscedastic_noise( "cv_plus": dict(method="plus", cv=10), "cv_minmax": dict(method="minmax", cv=10), "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)), - "conformalized_quantile_regression": dict( - method="quantile", cv="split", alpha=0.05 - ) + "conformalized_quantile_regression": dict(method="quantile", cv="split", alpha=0.05), } y_pred, y_pis = {}, {} for strategy, params in STRATEGIES.items(): @@ -399,7 +368,7 @@ def get_1d_data_with_heteroscedastic_noise( "cv_plus", "cv_minmax", "jackknife_plus_ab", - "conformalized_quantile_regression" + "conformalized_quantile_regression", ] n_figs = len(strategies) fig, axs = plt.subplots(3, 2, figsize=(9, 13)) @@ -410,12 +379,12 @@ def get_1d_data_with_heteroscedastic_noise( y_train.ravel(), X_test.ravel(), y_mesh.ravel(), - (1.96*noise*X_test).ravel(), + (1.96 * noise * X_test).ravel(), y_pred[strategy].ravel(), y_pis[strategy][:, 0, 0].ravel(), y_pis[strategy][:, 1, 0].ravel(), ax=coord, - title=strategy + title=strategy, ) plt.show() @@ -426,13 +395,9 @@ def get_1d_data_with_heteroscedastic_noise( # prediction intervals to the local noise. fig, ax = plt.subplots(1, 1, figsize=(7, 5)) -ax.plot(X_test, 1.96*2*noise*X_test, ls="--", color="k", label="True width") +ax.plot(X_test, 1.96 * 2 * noise * X_test, ls="--", color="k", label="True width") for strategy in STRATEGIES: - ax.plot( - X_test, - y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0], - label=strategy - ) + ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0], label=strategy) ax.set_xlabel("x") ax.set_ylabel("Prediction Interval Width") ax.legend(fontsize=8) @@ -452,29 +417,26 @@ def get_1d_data_with_heteroscedastic_noise( # these methods, the conditional coverage is likely not guaranteed as we will # observe in the next figure. + def get_heteroscedastic_coverage(y_test, y_pis, STRATEGIES, bins): recap = {} - for i in range(len(bins)-1): - bin1, bin2 = bins[i], bins[i+1] + for i in range(len(bins) - 1): + bin1, bin2 = bins[i], bins[i + 1] name = f"[{bin1}, {bin2}]" recap[name] = [] for strategy in STRATEGIES: - indices = np.where((X_test >= bins[i]) * (X_test <= bins[i+1])) + indices = np.where((X_test >= bins[i]) * (X_test <= bins[i + 1])) y_test_trunc = np.take(y_test, indices) y_low_ = np.take(y_pis[strategy][:, 0, 0], indices) y_high_ = np.take(y_pis[strategy][:, 1, 0], indices) - score_coverage = regression_coverage_score( - y_test_trunc[0], y_low_[0], y_high_[0] - ) + score_coverage = regression_coverage_score(y_test_trunc[0], y_low_[0], y_high_[0]) recap[name].append(score_coverage) recap_df = pd.DataFrame(recap, index=STRATEGIES) return recap_df bins = [0, 1, 2, 3, 4, 5] -heteroscedastic_coverage = get_heteroscedastic_coverage( - y_test, y_pis, STRATEGIES, bins -) +heteroscedastic_coverage = get_heteroscedastic_coverage(y_test, y_pis, STRATEGIES, bins) # fig = plt.figure() heteroscedastic_coverage.T.plot.bar(figsize=(12, 5), alpha=0.7) @@ -492,16 +454,17 @@ def get_heteroscedastic_coverage(y_test, y_pis, STRATEGIES, bins): # points whose true values lie within the prediction intervals, given by # the different strategies. -pd.DataFrame([ +pd.DataFrame( [ - regression_coverage_score( - y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0] - ), - ( - y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0] - ).mean() - ] for strategy in STRATEGIES -], index=STRATEGIES, columns=["Coverage", "Width average"]).round(2) + [ + regression_coverage_score(y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]), + (y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]).mean(), + ] + for strategy in STRATEGIES + ], + index=STRATEGIES, + columns=["Coverage", "Width average"], +).round(2) ############################################################################## @@ -525,6 +488,7 @@ def get_heteroscedastic_coverage(y_test, y_pis, STRATEGIES, bins): # # Let's start by generating and showing the data. + def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): """ Generate noisy 1D data with normal distribution from given function @@ -532,16 +496,14 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): """ np.random.seed(59) X_train = np.random.normal(mu, sigma, n_samples) - X_test = np.arange(mu-4*sigma, mu+4*sigma, sigma/20.) + X_test = np.arange(mu - 4 * sigma, mu + 4 * sigma, sigma / 20.0) y_train, y_mesh, y_test = funct(X_train), funct(X_test), funct(X_test) y_train += np.random.normal(0, noise, y_train.shape[0]) y_test += np.random.normal(0, noise, y_test.shape[0]) - return ( - X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh - ) + return (X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh) -mu, sigma, n_samples, noise = 0, 2, 1000, 0. +mu, sigma, n_samples, noise = 0, 2, 1000, 0.0 X_train, y_train, X_test, y_test, y_mesh = get_1d_data_with_normal_distrib( x_sinx, mu, sigma, n_samples, noise ) @@ -559,10 +521,13 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): polyn_model_quant = Pipeline( [ ("poly", PolynomialFeatures(degree=degree_polyn)), - ("linear", QuantileRegressor( + ( + "linear", + QuantileRegressor( solver="highs-ds", alpha=0, - )) + ), + ), ] ) STRATEGIES = { @@ -574,12 +539,8 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): "cv_plus": dict(method="plus", cv=10), "cv_minmax": dict(method="minmax", cv=10), "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)), - "jackknife_minmax_ab": dict( - method="minmax", cv=Subsample(n_resamplings=50) - ), - "conformalized_quantile_regression": dict( - method="quantile", cv="split", alpha=0.05 - ) + "jackknife_minmax_ab": dict(method="minmax", cv=Subsample(n_resamplings=50)), + "conformalized_quantile_regression": dict(method="quantile", cv="split", alpha=0.05), } y_pred, y_pis = {}, {} for strategy, params in STRATEGIES.items(): @@ -598,7 +559,7 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): "cv_plus", "cv_minmax", "jackknife_plus_ab", - "conformalized_quantile_regression" + "conformalized_quantile_regression", ] n_figs = len(strategies) fig, axs = plt.subplots(3, 2, figsize=(9, 13)) @@ -609,12 +570,12 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): y_train.ravel(), X_test.ravel(), y_mesh.ravel(), - 1.96*noise, + 1.96 * noise, y_pred[strategy].ravel(), y_pis[strategy][:, 0, :].ravel(), y_pis[strategy][:, 1, :].ravel(), ax=coord, - title=strategy + title=strategy, ) plt.show() @@ -631,11 +592,7 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): fig, ax = plt.subplots(1, 1, figsize=(7, 5)) ax.set_yscale("log") for strategy in STRATEGIES: - ax.plot( - X_test, - y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0], - label=strategy - ) + ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0], label=strategy) ax.set_xlabel("x") ax.set_ylabel("Prediction Interval Width") ax.legend(fontsize=8) @@ -655,16 +612,17 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): # quantile regressor with quantile :math:`\alpha/2`. Note that a warning will # be issued when this occurs. -pd.DataFrame([ +pd.DataFrame( [ - regression_coverage_score( - y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0] - ), - ( - y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0] - ).mean() - ] for strategy in STRATEGIES -], index=STRATEGIES, columns=["Coverage", "Width average"]).round(3) + [ + regression_coverage_score(y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]), + (y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]).mean(), + ] + for strategy in STRATEGIES + ], + index=STRATEGIES, + columns=["Coverage", "Width average"], +).round(3) ############################################################################## # In conclusion, the Jackknife-minmax, CV+, CV-minmax, or Jackknife-minmax-ab diff --git a/examples/regression/4-tutorials/plot_ResidualNormalisedScore_tutorial.py b/examples/regression/4-tutorials/plot_residual-normalised-score-tutorial.py similarity index 81% rename from examples/regression/4-tutorials/plot_ResidualNormalisedScore_tutorial.py rename to examples/regression/4-tutorials/plot_residual-normalised-score-tutorial.py index 9e2dc164..b8b282ec 100644 --- a/examples/regression/4-tutorials/plot_ResidualNormalisedScore_tutorial.py +++ b/examples/regression/4-tutorials/plot_residual-normalised-score-tutorial.py @@ -52,7 +52,7 @@ axs.hist(y, bins=50) axs.set_xlabel("Median price of houses") axs.set_title("Histogram of house prices") -axs.xaxis.set_major_formatter(FormatStrFormatter('%.0f' + "k")) +axs.xaxis.set_major_formatter(FormatStrFormatter("%.0f" + "k")) plt.show() @@ -66,21 +66,11 @@ np.array(X) np.array(y) X_train, X_test, y_train, y_test = train_test_split( - X, - y, - random_state=random_state, - test_size=0.02 -) -X_train, X_calib, y_train, y_calib = train_test_split( - X_train, - y_train, - random_state=random_state + X, y, random_state=random_state, test_size=0.02 ) +X_train, X_calib, y_train, y_calib = train_test_split(X_train, y_train, random_state=random_state) X_calib_prefit, X_res, y_calib_prefit, y_res = train_test_split( - X_calib, - y_calib, - random_state=random_state, - test_size=0.5 + X_calib, y_calib, random_state=random_state, test_size=0.5 ) @@ -101,14 +91,13 @@ # estimator a :class:`~sklearn.linear_model.LinearRegression` wrapped to avoid # negative values like it is done by default in the class. + class PosEstim(LinearRegression): def __init__(self): super().__init__() def fit(self, X, y): - super().fit( - X, np.log(np.maximum(y, np.full(y.shape, np.float64(1e-8)))) - ) + super().fit(X, np.log(np.maximum(y, np.full(y.shape, np.float64(1e-8))))) return self def predict(self, X): @@ -120,10 +109,7 @@ def predict(self, X): base_model = base_model.fit(X_train, y_train) residual_estimator = RandomForestRegressor( - n_estimators=20, - max_leaf_nodes=70, - min_samples_leaf=7, - random_state=random_state + n_estimators=20, max_leaf_nodes=70, min_samples_leaf=7, random_state=random_state ) residual_estimator = residual_estimator.fit( X_res, np.abs(np.subtract(y_res, base_model.predict(X_res))) @@ -133,34 +119,25 @@ def predict(self, X): ) # Estimating prediction intervals STRATEGIES = { - "Default": { - "cv": "split", - "conformity_score": ResidualNormalisedScore() - }, + "Default": {"cv": "split", "conformity_score": ResidualNormalisedScore()}, "Base model prefit": { "cv": "prefit", "estimator": base_model, - "conformity_score": ResidualNormalisedScore( - split_size=0.5, random_state=random_state - ) + "conformity_score": ResidualNormalisedScore(split_size=0.5, random_state=random_state), }, "Base and residual model prefit": { "cv": "prefit", "estimator": base_model, "conformity_score": ResidualNormalisedScore( - residual_estimator=residual_estimator, - random_state=random_state, - prefit=True - ) + residual_estimator=residual_estimator, random_state=random_state, prefit=True + ), }, "Wrapped residual model": { "cv": "prefit", "estimator": base_model, "conformity_score": ResidualNormalisedScore( - residual_estimator=wrapped_residual_estimator, - random_state=random_state, - prefit=True - ) + residual_estimator=wrapped_residual_estimator, random_state=random_state, prefit=True + ), }, } @@ -169,18 +146,17 @@ def predict(self, X): alpha = 0.1 for strategy, params in STRATEGIES.items(): mapie = MapieRegressor(**params, random_state=random_state) - if mapie.conformity_score.prefit: + if ( + isinstance(mapie.conformity_score, ResidualNormalisedScore) + and mapie.conformity_score.prefit + ): mapie.fit(X_calib_prefit, y_calib_prefit) else: mapie.fit(X_calib, y_calib) y_pred[strategy], intervals[strategy] = mapie.predict(X_test, alpha=alpha) - coverage[strategy] = regression_coverage_score_v2( - y_test, intervals[strategy] - ) - cond_coverage[strategy] = regression_ssc_score( - y_test, intervals[strategy], num_bins=num_bins - ) + coverage[strategy] = regression_coverage_score_v2(y_test, intervals[strategy]) + cond_coverage[strategy] = regression_ssc_score(y_test, intervals[strategy], num_bins=num_bins) def yerr(y_pred, intervals) -> ArrayLike: @@ -199,13 +175,15 @@ def yerr(y_pred, intervals) -> ArrayLike: ArrayLike Error bars. """ - return np.abs(np.concatenate( - [ - np.expand_dims(y_pred, 0) - intervals[:, 0, 0].T, - intervals[:, 1, 0].T - np.expand_dims(y_pred, 0), - ], - axis=0, - )) + return np.abs( + np.concatenate( + [ + np.expand_dims(y_pred, 0) - intervals[:, 0, 0].T, + intervals[:, 1, 0].T - np.expand_dims(y_pred, 0), + ], + axis=0, + ) + ) def plot_predictions(y, y_pred, intervals, coverage, cond_coverage, ax=None): @@ -244,7 +222,7 @@ def plot_predictions(y, y_pred, intervals, coverage, cond_coverage, ax=None): color="g", alpha=0.2, linestyle="None", - label="Inside prediction interval" + label="Inside prediction interval", ) ax.errorbar( y[warnings], @@ -253,15 +231,12 @@ def plot_predictions(y, y_pred, intervals, coverage, cond_coverage, ax=None): color="r", alpha=0.3, linestyle="None", - label="Outside prediction interval" + label="Outside prediction interval", ) ax.scatter(y, y_pred, s=3, color="black") ax.plot([0, max(max(y), max(y_pred))], [0, max(max(y), max(y_pred))], "-r") - ax.set_title( - f"{strategy} - coverage={coverage:.0%} " + - f"- max violation={cond_coverage:.0%}" - ) + ax.set_title(f"{strategy} - coverage={coverage:.0%} " + f"- max violation={cond_coverage:.0%}") ax.set_xlabel("y true") ax.set_ylabel("y pred") ax.legend() @@ -276,7 +251,7 @@ def plot_predictions(y, y_pred, intervals, coverage, cond_coverage, ax=None): intervals[strategy], coverage[strategy][0], cond_coverage[strategy][0], - ax=ax + ax=ax, ) fig.suptitle(f"Predicted values and intervals of level {alpha}") diff --git a/examples/regression/4-tutorials/plot_ts-tutorial.py b/examples/regression/4-tutorials/plot_ts-tutorial.py index d34e95ec..c46be619 100644 --- a/examples/regression/4-tutorials/plot_ts-tutorial.py +++ b/examples/regression/4-tutorials/plot_ts-tutorial.py @@ -51,8 +51,7 @@ class that block bootstraps the training set. from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit -from mapie.metrics import (regression_coverage_score, - regression_mean_width_score) +from mapie.metrics import regression_coverage_score, regression_mean_width_score from mapie.subsample import BlockBootstrap from mapie.regression import MapieTimeSeriesRegressor @@ -74,9 +73,7 @@ class that block bootstraps the training set. "https://raw.githubusercontent.com/scikit-learn-contrib/MAPIE/master/" "examples/data/demand_temperature.csv" ) -demand_df = pd.read_csv( - url_file, parse_dates=True, index_col=0 -) +demand_df = pd.read_csv(url_file, parse_dates=True, index_col=0) demand_df["Date"] = pd.to_datetime(demand_df.index) demand_df["Weekofyear"] = demand_df.Date.dt.isocalendar().week.astype("int64") demand_df["Weekday"] = demand_df.Date.dt.isocalendar().day.astype("int64") @@ -91,7 +88,7 @@ class that block bootstraps the training set. # It aims at simulating an effect, such as blackout or lockdown due to a # pandemic, that was not taken into account by the model during its training. -demand_df.Demand.iloc[-int(num_test_steps/2):] -= 2 +demand_df.Demand.iloc[-int(num_test_steps / 2) :] -= 2 ############################################################################## # The last week of the dataset is considered as test set, the remaining data @@ -102,9 +99,7 @@ class that block bootstraps the training set. features = ["Weekofyear", "Weekday", "Hour", "Temperature"] features += [f"Lag_{hour}" for hour in range(1, n_lags)] -X_train = demand_train.loc[ - ~np.any(demand_train[features].isnull(), axis=1), features -] +X_train = demand_train.loc[~np.any(demand_train[features].isnull(), axis=1), features] y_train = demand_train.loc[X_train.index, "Demand"] X_test = demand_test.loc[:, features] y_test = demand_test["Demand"] @@ -152,9 +147,7 @@ class that block bootstraps the training set. model = cv_obj.best_estimator_ else: # Model: Random Forest previously optimized with a cross-validation - model = RandomForestRegressor( - max_depth=10, n_estimators=50, random_state=59 - ) + model = RandomForestRegressor(max_depth=10, n_estimators=50, random_state=59) ############################################################################## # 3. Estimate prediction intervals on the test set @@ -184,9 +177,7 @@ class that block bootstraps the training set. alpha = 0.05 gap = 1 -cv_mapiets = BlockBootstrap( - n_resamplings=10, n_blocks=10, overlapping=False, random_state=59 -) +cv_mapiets = BlockBootstrap(n_resamplings=10, n_blocks=10, overlapping=False, random_state=59) mapie_enbpi = MapieTimeSeriesRegressor( model, method="enbpi", cv=cv_mapiets, agg_function="mean", n_jobs=-1 ) @@ -198,12 +189,8 @@ class that block bootstraps the training set. y_pred_npfit, y_pis_npfit = mapie_enbpi.predict( X_test, alpha=alpha, ensemble=True, optimize_beta=True ) -coverage_npfit = regression_coverage_score( - y_test, y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0] -) -width_npfit = regression_mean_width_score( - y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0] -) +coverage_npfit = regression_coverage_score(y_test, y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0]) +width_npfit = regression_mean_width_score(y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0]) ############################################################################## @@ -223,27 +210,17 @@ class that block bootstraps the training set. ) for step in range(gap, len(X_test), gap): mapie_enbpi.partial_fit( - X_test.iloc[(step - gap):step, :], - y_test.iloc[(step - gap):step], + X_test.iloc[(step - gap) : step, :], + y_test.iloc[(step - gap) : step], ) - ( - y_pred_pfit[step:step + gap], - y_pis_pfit[step:step + gap, :, :], - ) = mapie_enbpi.predict( - X_test.iloc[step:(step + gap), :], - alpha=alpha, - ensemble=True, - optimize_beta=True + (y_pred_pfit[step : step + gap], y_pis_pfit[step : step + gap, :, :],) = mapie_enbpi.predict( + X_test.iloc[step : (step + gap), :], alpha=alpha, ensemble=True, optimize_beta=True ) conformity_scores_pfit.append(mapie_enbpi.conformity_scores_) lower_quantiles_pfit.append(mapie_enbpi.lower_quantiles_) higher_quantiles_pfit.append(mapie_enbpi.higher_quantiles_) -coverage_pfit = regression_coverage_score( - y_test, y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0] -) -width_pfit = regression_mean_width_score( - y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0] -) +coverage_pfit = regression_coverage_score(y_test, y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0]) +width_pfit = regression_mean_width_score(y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0]) ############################################################################## # 4. Plot estimated prediction intervals on one-step ahead forecast @@ -257,21 +234,13 @@ class that block bootstraps the training set. coverages = [coverage_npfit, coverage_pfit] widths = [width_npfit, width_pfit] -fig, axs = plt.subplots( - nrows=2, ncols=1, figsize=(14, 8), sharey="row", sharex="col" -) +fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(14, 8), sharey="row", sharex="col") for i, (ax, w) in enumerate(zip(axs, ["without", "with"])): ax.set_ylabel("Hourly demand (GW)") - ax.plot( - y_train[int(-len(y_test)/2):], - lw=2, - label="Training data", c="C0" - ) + ax.plot(y_train[int(-len(y_test) / 2) :], lw=2, label="Training data", c="C0") ax.plot(y_test, lw=2, label="Test data", c="C1") - ax.plot( - y_test.index, y_preds[i], lw=2, c="C2", label="Predictions" - ) + ax.plot(y_test.index, y_preds[i], lw=2, c="C2", label="Predictions") ax.fill_between( y_test.index, y_pis[i][:, 0, 0], @@ -297,29 +266,23 @@ class that block bootstraps the training set. for i in range(window, len(y_test), 1): rolling_coverage_pfit.append( regression_coverage_score( - y_test[i-window:i], y_pis_pfit[i-window:i, 0, 0], - y_pis_pfit[i-window:i, 1, 0] + y_test[i - window : i], + y_pis_pfit[i - window : i, 0, 0], + y_pis_pfit[i - window : i, 1, 0], ) ) rolling_coverage_npfit.append( regression_coverage_score( - y_test[i-window:i], y_pis_npfit[i-window:i, 0, 0], - y_pis_npfit[i-window:i, 1, 0] + y_test[i - window : i], + y_pis_npfit[i - window : i, 0, 0], + y_pis_npfit[i - window : i, 1, 0], ) ) plt.figure(figsize=(10, 5)) plt.ylabel(f"Rolling coverage [{window} hours]") -plt.plot( - y_test[window:].index, - rolling_coverage_npfit, - label="Without update of residuals" -) -plt.plot( - y_test[window:].index, - rolling_coverage_pfit, - label="With update of residuals" -) +plt.plot(y_test[window:].index, rolling_coverage_npfit, label="Without update of residuals") +plt.plot(y_test[window:].index, rolling_coverage_pfit, label="With update of residuals") plt.show() ############################################################################## diff --git a/examples/regression/README.rst b/examples/regression/README.rst index a4300404..161b6aa8 100644 --- a/examples/regression/README.rst +++ b/examples/regression/README.rst @@ -1,4 +1,4 @@ .. _regression_examples: Regression examples -=================== \ No newline at end of file +=================== diff --git a/mapie/__init__.py b/mapie/__init__.py index 380383af..5d20408f 100644 --- a/mapie/__init__.py +++ b/mapie/__init__.py @@ -1,9 +1,4 @@ from . import classification, metrics, regression from ._version import __version__ -__all__ = [ - "regression", - "classification", - "metrics", - "__version__" -] +__all__ = ["regression", "classification", "metrics", "__version__"] diff --git a/mapie/_compatibility.py b/mapie/_compatibility.py index 5ead5c20..fc1679ac 100644 --- a/mapie/_compatibility.py +++ b/mapie/_compatibility.py @@ -7,30 +7,21 @@ def np_quantile_version_below_122( - a: ArrayLike, - q: ArrayLike, - method: str = "linear", - **kwargs: Any + a: ArrayLike, q: ArrayLike, method: str = "linear", **kwargs: Any ) -> NDArray: """Wrapper of np.quantile function for numpy version < 1.22.""" return np.quantile(a, q, interpolation=method, **kwargs) # type: ignore def np_quantile_version_above_122( - a: ArrayLike, - q: ArrayLike, - method: str = "linear", - **kwargs: Any + a: ArrayLike, q: ArrayLike, method: str = "linear", **kwargs: Any ) -> NDArray: """Wrapper of np.quantile function for numpy version >= 1.22.""" return np.quantile(a, q, method=method, **kwargs) # type: ignore def np_nanquantile_version_below_122( - a: ArrayLike, - q: ArrayLike, - method: str = "linear", - **kwargs: Any + a: ArrayLike, q: ArrayLike, method: str = "linear", **kwargs: Any ) -> NDArray: """Wrapper of np.quantile function for numpy version < 1.22.""" # Does not work if `a` is of dtype object. Converting `a` to a float array @@ -39,10 +30,7 @@ def np_nanquantile_version_below_122( def np_nanquantile_version_above_122( - a: ArrayLike, - q: ArrayLike, - method: str = "linear", - **kwargs: Any + a: ArrayLike, q: ArrayLike, method: str = "linear", **kwargs: Any ) -> NDArray: """Wrapper of np.quantile function for numpy version >= 1.22.""" return np.nanquantile(a, q, method=method, **kwargs) # type: ignore diff --git a/mapie/calibration.py b/mapie/calibration.py index 42d409af..8d8acaff 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -9,13 +9,17 @@ from sklearn.isotonic import IsotonicRegression from sklearn.utils import check_random_state from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, - indexable) +from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable from ._typing import ArrayLike, NDArray -from .utils import (check_estimator_classification, - check_estimator_fit_predict, check_n_features_in, - check_null_weight, fit_estimator, get_calib_set) +from .utils import ( + check_estimator_classification, + check_estimator_fit_predict, + check_n_features_in, + check_null_weight, + fit_estimator, + get_calib_set, +) class MapieCalibrator(BaseEstimator, ClassifierMixin): @@ -115,7 +119,7 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): named_calibrators = { "sigmoid": _SigmoidCalibration(), - "isotonic": IsotonicRegression(out_of_bounds="clip") + "isotonic": IsotonicRegression(out_of_bounds="clip"), } valid_methods = ["top_label"] @@ -161,10 +165,7 @@ def _check_cv( """ if cv in self.valid_cv: return cv - raise ValueError( - "Invalid cv argument. " - f"Allowed values are {self.valid_cv}." - ) + raise ValueError("Invalid cv argument. " f"Allowed values are {self.valid_cv}.") def _check_calibrator( self, @@ -201,15 +202,13 @@ def _check_calibrator( else: raise ValueError( "Please provide a string in: " - + (", ").join(self.named_calibrators.keys()) + "." + + (", ").join(self.named_calibrators.keys()) + + "." ) check_estimator_fit_predict(calibrator) return calibrator - def _get_labels( - self, - X: ArrayLike - ) -> Tuple[NDArray, NDArray]: + def _get_labels(self, X: ArrayLike) -> Tuple[NDArray, NDArray]: """ This method depends on the value of ``method`` and collects the labels that are needed to transform a multi-class calibration to multiple @@ -250,8 +249,7 @@ def _check_method(self) -> None: """ if self.method not in self.valid_methods: raise ValueError( - "Invalid method, allowed method are: " - + (", ").join(self.valid_methods) + "." + "Invalid method, allowed method are: " + (", ").join(self.valid_methods) + "." ) def _check_type_of_target(self, y: ArrayLike): @@ -266,7 +264,8 @@ def _check_type_of_target(self, y: ArrayLike): if type_of_target(y) not in self.valid_inputs: raise ValueError( "Make sure to have one of the allowed targets: " - + (", ").join(self.valid_inputs) + "." + + (", ").join(self.valid_inputs) + + "." ) def _fit_calibrator( @@ -312,18 +311,12 @@ def _fit_calibrator( if sample_weight is not None: sample_weight_ = sample_weight[given_label_indices] - ( + (sample_weight_, top_class_prob_, y_calib_) = check_null_weight( sample_weight_, top_class_prob_, y_calib_ - ) = check_null_weight( - sample_weight_, - top_class_prob_, - y_calib_ ) else: sample_weight_ = sample_weight - calibrator_ = fit_estimator( - calibrator_, top_class_prob_, y_calib_, sample_weight_ - ) + calibrator_ = fit_estimator(calibrator_, top_class_prob_, y_calib_, sample_weight_) return calibrator_ def _fit_calibrators( @@ -409,9 +402,7 @@ def _pred_proba_calib( """ idx_labels = np.where(y_pred.ravel() == label)[0].ravel() if label not in self.calibrators.keys(): - calibrated_values[ - idx_labels, idx - ] = max_prob[idx_labels].ravel() + calibrated_values[idx_labels, idx] = max_prob[idx_labels].ravel() warnings.warn( f"WARNING: This predicted label {label} has not been seen " + " during the calibration and therefore scores will remain" @@ -487,9 +478,7 @@ def fit( self.single_estimator_ = estimator self.classes_ = self.single_estimator_.classes_ self.n_classes_ = len(self.classes_) - self.calibrators = self._fit_calibrators( - X, y, sample_weight, calibrator - ) + self.calibrators = self._fit_calibrators(X, y, sample_weight, calibrator) if cv == "split": results = get_calib_set( X, @@ -503,20 +492,17 @@ def fit( X_train, y_train, X_calib, y_calib, sw_train, sw_calib = results X_train, y_train = indexable(X_train, y_train) y_train = _check_y(y_train) - sw_train, X_train, y_train = check_null_weight( - sw_train, - X_train, - y_train - ) + sw_train, X_train, y_train = check_null_weight(sw_train, X_train, y_train) estimator = fit_estimator( - clone(estimator), X_train, y_train, sw_train, + clone(estimator), + X_train, + y_train, + sw_train, ) self.single_estimator_ = estimator self.classes_ = self.single_estimator_.classes_ self.n_classes_ = len(self.classes_) - self.calibrators = self._fit_calibrators( - X_calib, y_calib, sw_calib, calibrator - ) + self.calibrators = self._fit_calibrators(X_calib, y_calib, sw_calib, calibrator) return self def predict_proba( diff --git a/mapie/classification.py b/mapie/classification.py index 05a73cf4..8a6fad3b 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -9,18 +9,25 @@ from sklearn.model_selection import BaseCrossValidator, ShuffleSplit from sklearn.preprocessing import LabelEncoder, label_binarize from sklearn.utils import _safe_indexing, check_random_state -from sklearn.utils.multiclass import (check_classification_targets, - type_of_target) -from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, - indexable) +from sklearn.utils.multiclass import check_classification_targets, type_of_target +from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable from ._machine_precision import EPSILON from ._typing import ArrayLike, NDArray from .metrics import classification_mean_width_score -from .utils import (check_alpha, check_alpha_and_n_samples, check_cv, - check_estimator_classification, check_n_features_in, - check_n_jobs, check_null_weight, check_verbose, - compute_quantiles, fit_estimator, fix_number_of_classes) +from .utils import ( + check_alpha, + check_alpha_and_n_samples, + check_cv, + check_estimator_classification, + check_n_features_in, + check_n_jobs, + check_null_weight, + check_verbose, + compute_quantiles, + fit_estimator, + fix_number_of_classes, +) class MapieClassifier(BaseEstimator, ClassifierMixin): @@ -186,9 +193,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): """ raps_valid_cv_ = ["prefit", "split"] - valid_methods_ = [ - "naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps" - ] + valid_methods_ = ["naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps"] fit_attributes = [ "single_estimator_", "estimators_", @@ -196,7 +201,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): "n_features_in_", "conformity_scores_", "classes_", - "label_encoder_" + "label_encoder_", ] def __init__( @@ -207,7 +212,7 @@ def __init__( test_size: Optional[Union[int, float]] = None, n_jobs: Optional[int] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - verbose: int = 0 + verbose: int = 0, ) -> None: self.estimator = estimator self.method = method @@ -227,10 +232,7 @@ def _check_parameters(self) -> None: If parameters are not valid. """ if self.method not in self.valid_methods_: - raise ValueError( - "Invalid method. " - f"Allowed values are {self.valid_methods_}." - ) + raise ValueError("Invalid method. " f"Allowed values are {self.valid_methods_}.") check_n_jobs(self.n_jobs) check_verbose(self.verbose) check_random_state(self.random_state) @@ -250,18 +252,18 @@ def _check_depreciated(self) -> None: if self.method == "score": warnings.warn( "WARNING: Deprecated method. " - + "The method \"score\" is outdated. " - + "Prefer to use \"lac\" instead to keep " + + 'The method "score" is outdated. ' + + 'Prefer to use "lac" instead to keep ' + "the same behavior in the next release.", - DeprecationWarning + DeprecationWarning, ) if self.method == "cumulated_score": warnings.warn( "WARNING: Deprecated method. " - + "The method \"cumulated_score\" is outdated. " - + "Prefer to use \"aps\" instead to keep " + + 'The method "cumulated_score" is outdated. ' + + 'Prefer to use "aps" instead to keep ' + "the same behavior in the next release.", - DeprecationWarning + DeprecationWarning, ) def _check_target(self, y: ArrayLike) -> None: @@ -281,8 +283,7 @@ def _check_target(self, y: ArrayLike) -> None: or ``"score"`` or if type of target is not multi-class. """ check_classification_targets(y) - if type_of_target(y) == "binary" and \ - self.method not in ["score", "lac"]: + if type_of_target(y) == "binary" and self.method not in ["score", "lac"]: raise ValueError( "Invalid method for binary target. " "Your target is not of type multiclass and " @@ -301,17 +302,12 @@ def _check_raps(self): If ``method`` is ``"raps"`` and ``cv`` is not ``"prefit"``. """ if (self.method == "raps") and ( - (self.cv not in self.raps_valid_cv_) - or isinstance(self.cv, ShuffleSplit) + (self.cv not in self.raps_valid_cv_) or isinstance(self.cv, ShuffleSplit) ): - raise ValueError( - "RAPS method can only be used " - f"with cv in {self.raps_valid_cv_}." - ) + raise ValueError("RAPS method can only be used " f"with cv in {self.raps_valid_cv_}.") def _check_include_last_label( - self, - include_last_label: Optional[Union[bool, str]] + self, include_last_label: Optional[Union[bool, str]] ) -> Optional[Union[bool, str]]: """ Check if ``include_last_label`` is a boolean or a string. @@ -342,22 +338,14 @@ def _check_include_last_label( "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." """ - if ( - (not isinstance(include_last_label, bool)) and - (not include_last_label == "randomized") - ): + if (not isinstance(include_last_label, bool)) and (not include_last_label == "randomized"): raise ValueError( - "Invalid include_last_label argument. " - "Should be a boolean or 'randomized'." + "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." ) else: return include_last_label - def _check_proba_normalized( - self, - y_pred_proba: ArrayLike, - axis: int = 1 - ) -> NDArray: + def _check_proba_normalized(self, y_pred_proba: ArrayLike, axis: int = 1) -> NDArray: """ Check if, for all the observations, the sum of the probabilities is equal to one. @@ -384,7 +372,7 @@ def _check_proba_normalized( np.sum(y_pred_proba, axis=axis), 1, err_msg="The sum of the scores is not equal to one.", - rtol=1e-5 + rtol=1e-5, ) y_pred_proba = cast(NDArray, y_pred_proba).astype(np.float64) return y_pred_proba @@ -393,7 +381,7 @@ def _get_last_index_included( self, y_pred_proba_cumsum: NDArray, threshold: NDArray, - include_last_label: Optional[Union[bool, str]] + include_last_label: Optional[Union[bool, str]], ) -> NDArray: """ Return the index of the last included sorted probability @@ -424,32 +412,23 @@ def _get_last_index_included( NDArray of shape (n_samples, n_alpha) Index of the last included sorted probability. """ - if ( - (include_last_label) or - (include_last_label == 'randomized') - ): - y_pred_index_last = ( - np.ma.masked_less( - y_pred_proba_cumsum - - threshold[np.newaxis, :], - -EPSILON - ).argmin(axis=1) - ) - elif (include_last_label is False): + if (include_last_label) or (include_last_label == "randomized"): + y_pred_index_last = np.ma.masked_less( + y_pred_proba_cumsum - threshold[np.newaxis, :], -EPSILON + ).argmin(axis=1) + elif include_last_label is False: max_threshold = np.maximum( - threshold[np.newaxis, :], - np.min(y_pred_proba_cumsum, axis=1) + threshold[np.newaxis, :], np.min(y_pred_proba_cumsum, axis=1) ) y_pred_index_last = np.argmax( np.ma.masked_greater( - y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], - EPSILON - ), axis=1 + y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], EPSILON + ), + axis=1, ) else: raise ValueError( - "Invalid include_last_label argument. " - "Should be a boolean or 'randomized'." + "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." ) return y_pred_index_last[:, np.newaxis, :] @@ -461,7 +440,7 @@ def _add_random_tie_breaking( y_pred_proba_last: NDArray, threshold: NDArray, lambda_star: Union[NDArray, float, None], - k_star: Union[NDArray, None] + k_star: Union[NDArray, None], ) -> NDArray: """ Randomly remove last label from prediction set based on the @@ -507,29 +486,19 @@ def _add_random_tie_breaking( """ # get cumsumed probabilities up to last retained label y_proba_last_cumsumed = np.squeeze( - np.take_along_axis( - y_pred_proba_cumsum, - y_pred_index_last, - axis=1 - ), axis=1 + np.take_along_axis(y_pred_proba_cumsum, y_pred_index_last, axis=1), axis=1 ) if self.method in ["cumulated_score", "aps"]: # compute V parameter from Romano+(2020) - vs = ( - (y_proba_last_cumsumed - threshold.reshape(1, -1)) / - y_pred_proba_last[:, 0, :] - ) + vs = (y_proba_last_cumsumed - threshold.reshape(1, -1)) / y_pred_proba_last[:, 0, :] else: # compute V parameter from Angelopoulos+(2020) L = np.sum(prediction_sets, axis=1) - vs = ( - (y_proba_last_cumsumed - threshold.reshape(1, -1)) / - ( - y_pred_proba_last[:, 0, :] - - lambda_star * np.maximum(0, L - k_star) + - lambda_star * (L > k_star) - ) + vs = (y_proba_last_cumsumed - threshold.reshape(1, -1)) / ( + y_pred_proba_last[:, 0, :] + - lambda_star * np.maximum(0, L - k_star) + + lambda_star * (L > k_star) ) # get random numbers for each observation and alpha value @@ -538,10 +507,7 @@ def _add_random_tie_breaking( # remove last label from comparison between uniform number and V vs_less_than_us = np.less_equal(vs - us, EPSILON) np.put_along_axis( - prediction_sets, - y_pred_index_last, - vs_less_than_us[:, np.newaxis, :], - axis=1 + prediction_sets, y_pred_index_last, vs_less_than_us[:, np.newaxis, :], axis=1 ) return prediction_sets @@ -569,11 +535,7 @@ def _predict_oof_model( y_pred_proba = estimator.predict_proba(X) # we enforce y_pred_proba to contain all labels included in y if len(estimator.classes_) != self.n_classes_: - y_pred_proba = fix_number_of_classes( - self.n_classes_, - estimator.classes_, - y_pred_proba - ) + y_pred_proba = fix_number_of_classes(self.n_classes_, estimator.classes_, y_pred_proba) y_pred_proba = self._check_proba_normalized(y_pred_proba) return y_pred_proba @@ -636,9 +598,7 @@ def _fit_and_predict_oof_model( estimator = fit_estimator(estimator, X_train, y_train) else: sample_weight_train = _safe_indexing(sample_weight, train_index) - estimator = fit_estimator( - estimator, X_train, y_train, sample_weight_train - ) + estimator = fit_estimator(estimator, X_train, y_train, sample_weight_train) if _num_samples(X_val) > 0: y_pred_proba = self._predict_oof_model(estimator, X_val) else: @@ -647,9 +607,7 @@ def _fit_and_predict_oof_model( return estimator, y_pred_proba, val_id, val_index def _get_true_label_cumsum_proba( - self, - y: ArrayLike, - y_pred_proba: NDArray + self, y: ArrayLike, y_pred_proba: NDArray ) -> Tuple[NDArray, NDArray]: """ Compute the cumsumed probability of the true label. @@ -668,13 +626,9 @@ def _get_true_label_cumsum_proba( is the cumsum probability of the true label. The second is the sorted position of the true label. """ - y_true = label_binarize( - y=y, classes=self.classes_ - ) + y_true = label_binarize(y=y, classes=self.classes_) index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) - y_pred_proba_sorted = np.take_along_axis( - y_pred_proba, index_sorted, axis=1 - ) + y_pred_proba_sorted = np.take_along_axis(y_pred_proba, index_sorted, axis=1) y_true_sorted = np.take_along_axis(y_true, index_sorted, axis=1) y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) cutoff = np.argmax(y_true_sorted, axis=1) @@ -685,11 +639,7 @@ def _get_true_label_cumsum_proba( return true_label_cumsum_proba, cutoff + 1 def _regularize_conformity_score( - self, - k_star: NDArray, - lambda_: Union[NDArray, float], - conf_score: NDArray, - cutoff: NDArray + self, k_star: NDArray, lambda_: Union[NDArray, float], conf_score: NDArray, cutoff: NDArray ) -> NDArray: """ Regularize the conformity scores with the ``"raps"`` @@ -716,26 +666,12 @@ def _regularize_conformity_score( Regularized conformity scores. The regularization depends on the value of alpha. """ - conf_score = np.repeat( - conf_score[:, :, np.newaxis], len(k_star), axis=2 - ) - cutoff = np.repeat( - cutoff[:, np.newaxis], len(k_star), axis=1 - ) - conf_score += np.maximum( - np.expand_dims( - lambda_ * (cutoff - k_star), - axis=1 - ), - 0 - ) + conf_score = np.repeat(conf_score[:, :, np.newaxis], len(k_star), axis=2) + cutoff = np.repeat(cutoff[:, np.newaxis], len(k_star), axis=1) + conf_score += np.maximum(np.expand_dims(lambda_ * (cutoff - k_star), axis=1), 0) return conf_score - def _get_true_label_position( - self, - y_pred_proba: NDArray, - y: NDArray - ) -> NDArray: + def _get_true_label_position(self, y_pred_proba: NDArray, y: NDArray) -> NDArray: """ Return the sorted position of the true label in the prediction @@ -753,14 +689,8 @@ def _get_true_label_position( NDArray of shape (n_samples, 1) Position of the true label in the prediction. """ - index = np.argsort( - np.fliplr(np.argsort(y_pred_proba, axis=1)) - ) - position = np.take_along_axis( - index, - y.reshape(-1, 1), - axis=1 - ) + index = np.argsort(np.fliplr(np.argsort(y_pred_proba, axis=1))) + position = np.take_along_axis(index, y.reshape(-1, 1), axis=1) return position @@ -770,7 +700,7 @@ def _get_last_included_proba( thresholds: NDArray, include_last_label: Union[bool, str, None], lambda_: Union[NDArray, float, None], - k_star: Union[NDArray, Any] + k_star: Union[NDArray, Any], ) -> Tuple[NDArray, NDArray, NDArray]: """ Function that returns the smallest score @@ -805,59 +735,36 @@ def _get_last_included_proba( with the RAPS method, the index of the last included score and the value of the last included score. """ - index_sorted = np.flip( - np.argsort(y_pred_proba, axis=1), axis=1 - ) + index_sorted = np.flip(np.argsort(y_pred_proba, axis=1), axis=1) # sort probabilities by decreasing order - y_pred_proba_sorted = np.take_along_axis( - y_pred_proba, index_sorted, axis=1 - ) + y_pred_proba_sorted = np.take_along_axis(y_pred_proba, index_sorted, axis=1) # get sorted cumulated score - y_pred_proba_sorted_cumsum = np.cumsum( - y_pred_proba_sorted, axis=1 - ) + y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) if self.method == "raps": y_pred_proba_sorted_cumsum += lambda_ * np.maximum( - 0, - np.cumsum( - np.ones(y_pred_proba_sorted_cumsum.shape), - axis=1 - ) - k_star + 0, np.cumsum(np.ones(y_pred_proba_sorted_cumsum.shape), axis=1) - k_star ) # get cumulated score at their original position y_pred_proba_cumsum = np.take_along_axis( - y_pred_proba_sorted_cumsum, - np.argsort(index_sorted, axis=1), - axis=1 + y_pred_proba_sorted_cumsum, np.argsort(index_sorted, axis=1), axis=1 ) # get index of the last included label y_pred_index_last = self._get_last_index_included( - y_pred_proba_cumsum, - thresholds, - include_last_label + y_pred_proba_cumsum, thresholds, include_last_label ) # get the probability of the last included label - y_pred_proba_last = np.take_along_axis( - y_pred_proba, - y_pred_index_last, - axis=1 - ) + y_pred_proba_last = np.take_along_axis(y_pred_proba, y_pred_index_last, axis=1) - zeros_scores_proba_last = (y_pred_proba_last <= EPSILON) + zeros_scores_proba_last = y_pred_proba_last <= EPSILON # If the last included proba is zero, change it to the # smallest non-zero value to avoid inluding them in the # prediction sets. if np.sum(zeros_scores_proba_last) > 0: y_pred_proba_last[zeros_scores_proba_last] = np.expand_dims( - np.min( - np.ma.masked_less( - y_pred_proba, - EPSILON - ).filled(fill_value=np.inf), - axis=1 - ), axis=1 + np.min(np.ma.masked_less(y_pred_proba, EPSILON).filled(fill_value=np.inf), axis=1), + axis=1, )[zeros_scores_proba_last] return y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last @@ -868,7 +775,7 @@ def _update_size_and_lambda( alpha_np: NDArray, y_ps: NDArray, lambda_: Union[NDArray, float], - lambda_star: NDArray + lambda_star: NDArray, ) -> Tuple[NDArray, NDArray]: """Update the values of the optimal lambda if the average size of the prediction sets decreases with @@ -901,16 +808,10 @@ def _update_size_and_lambda( and the new best sizes. """ - sizes = [ - classification_mean_width_score( - y_ps[:, :, i] - ) for i in range(len(alpha_np)) - ] + sizes = [classification_mean_width_score(y_ps[:, :, i]) for i in range(len(alpha_np))] - sizes_improve = (sizes < best_sizes - EPSILON) - lambda_star = ( - sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star - ) + sizes_improve = sizes < best_sizes - EPSILON + lambda_star = sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star best_sizes = sizes_improve * sizes + (1 - sizes_improve) * best_sizes return lambda_star, best_sizes @@ -920,7 +821,7 @@ def _find_lambda_star( y_pred_proba_raps: NDArray, alpha_np: NDArray, include_last_label: Union[bool, str, None], - k_star: NDArray + k_star: NDArray, ) -> Union[NDArray, float]: """Find the optimal value of lambda for each alpha. @@ -948,37 +849,23 @@ def _find_lambda_star( lambda_star = np.zeros(len(alpha_np)) best_sizes = np.full(len(alpha_np), np.finfo(np.float64).max) - for lambda_ in [.001, .01, .1, .2, .5]: # values given in paper[3] - true_label_cumsum_proba, cutoff = ( - self._get_true_label_cumsum_proba( - self.y_raps_no_enc, - y_pred_proba_raps[:, :, 0], - ) + for lambda_ in [0.001, 0.01, 0.1, 0.2, 0.5]: # values given in paper[3] + true_label_cumsum_proba, cutoff = self._get_true_label_cumsum_proba( + self.y_raps_no_enc, + y_pred_proba_raps[:, :, 0], ) true_label_cumsum_proba_reg = self._regularize_conformity_score( - k_star, - lambda_, - true_label_cumsum_proba, - cutoff + k_star, lambda_, true_label_cumsum_proba, cutoff ) - quantiles_ = compute_quantiles( - true_label_cumsum_proba_reg, - alpha_np - ) + quantiles_ = compute_quantiles(true_label_cumsum_proba_reg, alpha_np) _, _, y_pred_proba_last = self._get_last_included_proba( - y_pred_proba_raps, - quantiles_, - include_last_label, - lambda_, - k_star + y_pred_proba_raps, quantiles_, include_last_label, lambda_, k_star ) - y_ps = np.greater_equal( - y_pred_proba_raps - y_pred_proba_last, -EPSILON - ) + y_ps = np.greater_equal(y_pred_proba_raps - y_pred_proba_last, -EPSILON) lambda_star, best_sizes = self._update_size_and_lambda( best_sizes, alpha_np, y_ps, lambda_, lambda_star ) @@ -986,9 +873,7 @@ def _find_lambda_star( lambda_star = lambda_star[0] return lambda_star - def _get_classes_info( - self, estimator: ClassifierMixin, y: NDArray - ) -> Tuple[int, NDArray]: + def _get_classes_info(self, estimator: ClassifierMixin, y: NDArray) -> Tuple[int, NDArray]: """ Compute the number of classes and the classes values according to either the pre-trained model or to the @@ -1046,7 +931,7 @@ def fit( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, - size_raps: Optional[float] = .2, + size_raps: Optional[float] = 0.2, ) -> MapieClassifier: """ Fit the base estimator or use the fitted base estimator. @@ -1082,9 +967,7 @@ def fit( """ # Checks self._check_parameters() - cv = check_cv( - self.cv, test_size=self.test_size, random_state=self.random_state - ) + cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) X, y = indexable(X, y) y = _check_y(y) @@ -1093,19 +976,12 @@ def fit( y = cast(NDArray, y) - estimator = check_estimator_classification( - X, - y, - cv, - self.estimator - ) + estimator = check_estimator_classification(X, y, cv, self.estimator) self.n_features_in_ = check_n_features_in(X, cv, estimator) n_samples = _num_samples(y) - self.n_classes_, self.classes_ = self._get_classes_info( - estimator, y - ) + self.n_classes_, self.classes_ = self._get_classes_info(estimator, y) enc = LabelEncoder() enc.fit(self.classes_) y_enc = enc.transform(y) @@ -1119,18 +995,15 @@ def fit( self.n_samples_ = _num_samples(X) if self.method == "raps": - raps_split = ShuffleSplit( - 1, test_size=size_raps, random_state=self.random_state - ) + raps_split = ShuffleSplit(1, test_size=size_raps, random_state=self.random_state) train_raps_index, val_raps_index = next(raps_split.split(X)) - X, self.X_raps, y_enc, self.y_raps = \ - _safe_indexing(X, train_raps_index), \ - _safe_indexing(X, val_raps_index), \ - _safe_indexing(y_enc, train_raps_index), \ - _safe_indexing(y_enc, val_raps_index) - self.y_raps_no_enc = self.label_encoder_.inverse_transform( - self.y_raps + X, self.X_raps, y_enc, self.y_raps = ( + _safe_indexing(X, train_raps_index), + _safe_indexing(X, val_raps_index), + _safe_indexing(y_enc, train_raps_index), + _safe_indexing(y_enc, val_raps_index), ) + self.y_raps_no_enc = self.label_encoder_.inverse_transform(self.y_raps) y = self.label_encoder_.inverse_transform(y_enc) y_enc = cast(NDArray, y_enc) n_samples = _num_samples(y_enc) @@ -1146,13 +1019,8 @@ def fit( else: cv = cast(BaseCrossValidator, cv) - self.single_estimator_ = fit_estimator( - clone(estimator), X, y, sample_weight - ) - y_pred_proba = np.empty( - (n_samples, self.n_classes_), - dtype=float - ) + self.single_estimator_ = fit_estimator(clone(estimator), X, y, sample_weight) + y_pred_proba = np.empty((n_samples, self.n_classes_), dtype=float) outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._fit_and_predict_oof_model)( clone(estimator), @@ -1165,19 +1033,12 @@ def fit( ) for k, (train_index, val_index) in enumerate(cv.split(X)) ) - ( - self.estimators_, - predictions_list, - val_ids_list, - val_indices_list - ) = map(list, zip(*outputs)) - predictions = np.concatenate( - cast(List[NDArray], predictions_list) + (self.estimators_, predictions_list, val_ids_list, val_indices_list) = map( + list, zip(*outputs) ) + predictions = np.concatenate(cast(List[NDArray], predictions_list)) val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) - val_indices = np.concatenate( - cast(List[NDArray], val_indices_list) - ) + val_indices = np.concatenate(cast(List[NDArray], val_indices_list)) self.k_[val_indices] = val_ids y_pred_proba[val_indices] = predictions @@ -1191,34 +1052,21 @@ def fit( # RAPS: compute y_pred and position on the RAPS validation dataset if self.method == "raps": - self.y_pred_proba_raps = self.single_estimator_.predict_proba( - self.X_raps - ) - self.position_raps = self._get_true_label_position( - self.y_pred_proba_raps, - self.y_raps - ) + self.y_pred_proba_raps = self.single_estimator_.predict_proba(self.X_raps) + self.position_raps = self._get_true_label_position(self.y_pred_proba_raps, self.y_raps) # Conformity scores if self.method == "naive": - self.conformity_scores_ = np.empty( - y_pred_proba.shape, - dtype="float" - ) + self.conformity_scores_ = np.empty(y_pred_proba.shape, dtype="float") elif self.method in ["score", "lac"]: self.conformity_scores_ = np.take_along_axis( 1 - y_pred_proba, y_enc.reshape(-1, 1), axis=1 ) elif self.method in ["cumulated_score", "aps", "raps"]: - self.conformity_scores_, self.cutoff = ( - self._get_true_label_cumsum_proba( - y, - y_pred_proba - ) - ) - y_proba_true = np.take_along_axis( - y_pred_proba, y_enc.reshape(-1, 1), axis=1 + self.conformity_scores_, self.cutoff = self._get_true_label_cumsum_proba( + y, y_pred_proba ) + y_proba_true = np.take_along_axis(y_pred_proba, y_enc.reshape(-1, 1), axis=1) random_state = check_random_state(self.random_state) u = random_state.uniform(size=len(y_pred_proba)).reshape(-1, 1) self.conformity_scores_ -= u * y_proba_true @@ -1226,15 +1074,9 @@ def fit( # Here we reorder the labels by decreasing probability # and get the position of each label from decreasing # probability - self.conformity_scores_ = self._get_true_label_position( - y_pred_proba, - y_enc - ) + self.conformity_scores_ = self._get_true_label_position(y_pred_proba, y_enc) else: - raise ValueError( - "Invalid method. " - f"Allowed values are {self.valid_methods_}." - ) + raise ValueError("Invalid method. " f"Allowed values are {self.valid_methods_}.") if isinstance(cv, ShuffleSplit): self.single_estimator_ = self.estimators_[0] @@ -1246,7 +1088,7 @@ def predict( X: ArrayLike, alpha: Optional[Union[float, Iterable[float]]] = None, include_last_label: Optional[Union[bool, str]] = True, - agg_scores: Optional[str] = "mean" + agg_scores: Optional[str] = "mean", ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Prediction prediction sets on new samples based on target confidence @@ -1316,9 +1158,7 @@ def predict( if self.method == "top_k": agg_scores = "mean" # Checks - cv = check_cv( - self.cv, test_size=self.test_size, random_state=self.random_state - ) + cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) include_last_label = self._check_include_last_label(include_last_label) alpha = cast(Optional[NDArray], check_alpha(alpha)) check_is_fitted(self, self.fit_attributes) @@ -1338,14 +1178,10 @@ def predict( check_alpha_and_n_samples(alpha_np, n) if cv == "prefit": y_pred_proba = self.single_estimator_.predict_proba(X) - y_pred_proba = np.repeat( - y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 - ) + y_pred_proba = np.repeat(y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2) else: y_pred_proba_k = np.asarray( - Parallel( - n_jobs=self.n_jobs, verbose=self.verbose - )( + Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._predict_oof_model)(estimator, X) for estimator in self.estimators_ ) @@ -1354,9 +1190,7 @@ def predict( y_pred_proba = np.moveaxis(y_pred_proba_k[self.k_], 0, 2) elif agg_scores == "mean": y_pred_proba = np.mean(y_pred_proba_k, axis=0) - y_pred_proba = np.repeat( - y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 - ) + y_pred_proba = np.repeat(y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2) else: raise ValueError("Invalid 'agg_scores' argument.") # Check that sum of probas is equal to 1 @@ -1371,59 +1205,38 @@ def predict( if (cv == "prefit") or (agg_scores in ["mean"]): if self.method == "raps": check_alpha_and_n_samples(alpha_np, len(self.X_raps)) - k_star = compute_quantiles( - self.position_raps, - alpha_np - ) + 1 + k_star = compute_quantiles(self.position_raps, alpha_np) + 1 y_pred_proba_raps = np.repeat( - self.y_pred_proba_raps[:, :, np.newaxis], - len(alpha_np), - axis=2 + self.y_pred_proba_raps[:, :, np.newaxis], len(alpha_np), axis=2 ) lambda_star = self._find_lambda_star( - y_pred_proba_raps, - alpha_np, - include_last_label, - k_star + y_pred_proba_raps, alpha_np, include_last_label, k_star ) - self.conformity_scores_regularized = ( - self._regularize_conformity_score( - k_star, - lambda_star, - self.conformity_scores_, - self.cutoff - ) + self.conformity_scores_regularized = self._regularize_conformity_score( + k_star, lambda_star, self.conformity_scores_, self.cutoff ) self.quantiles_ = compute_quantiles( - self.conformity_scores_regularized, - alpha_np + self.conformity_scores_regularized, alpha_np ) else: - self.quantiles_ = compute_quantiles( - self.conformity_scores_, - alpha_np - ) + self.quantiles_ = compute_quantiles(self.conformity_scores_, alpha_np) else: self.quantiles_ = (n + 1) * (1 - alpha_np) # Build prediction sets if self.method in ["score", "lac"]: if (cv == "prefit") or (agg_scores == "mean"): - prediction_sets = np.greater_equal( - y_pred_proba - (1 - self.quantiles_), -EPSILON - ) + prediction_sets = np.greater_equal(y_pred_proba - (1 - self.quantiles_), -EPSILON) else: y_pred_included = np.less_equal( - (1 - y_pred_proba) - self.conformity_scores_.ravel(), - EPSILON + (1 - y_pred_proba) - self.conformity_scores_.ravel(), EPSILON ).sum(axis=2) prediction_sets = np.stack( [ - np.greater_equal( - y_pred_included - _alpha * (n - 1), -EPSILON - ) + np.greater_equal(y_pred_included - _alpha * (n - 1), -EPSILON) for _alpha in alpha_np - ], axis=2 + ], + axis=2, ) elif self.method in ["naive", "cumulated_score", "aps", "raps"]: @@ -1433,25 +1246,23 @@ def predict( else: thresholds = self.conformity_scores_.ravel() # sort labels by decreasing probability - y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last = ( - self._get_last_included_proba( - y_pred_proba, - thresholds, - include_last_label, - lambda_star, - k_star, - ) + ( + y_pred_proba_cumsum, + y_pred_index_last, + y_pred_proba_last, + ) = self._get_last_included_proba( + y_pred_proba, + thresholds, + include_last_label, + lambda_star, + k_star, ) # get the prediction set by taking all probabilities # above the last one if (cv == "prefit") or (agg_scores in ["mean"]): - y_pred_included = np.greater_equal( - y_pred_proba - y_pred_proba_last, -EPSILON - ) + y_pred_included = np.greater_equal(y_pred_proba - y_pred_proba_last, -EPSILON) else: - y_pred_included = np.less_equal( - y_pred_proba - y_pred_proba_last, EPSILON - ) + y_pred_included = np.less_equal(y_pred_proba - y_pred_proba_last, EPSILON) # remove last label randomly if include_last_label == "randomized": y_pred_included = self._add_random_tie_breaking( @@ -1461,7 +1272,7 @@ def predict( y_pred_proba_last, thresholds, lambda_star, - k_star + k_star, ) if (cv == "prefit") or (agg_scores in ["mean"]): prediction_sets = y_pred_included @@ -1471,35 +1282,26 @@ def predict( prediction_sets = np.less_equal( prediction_sets_summed[:, :, np.newaxis] - self.quantiles_[np.newaxis, np.newaxis, :], - EPSILON + EPSILON, ) elif self.method == "top_k": y_pred_proba = y_pred_proba[:, :, 0] index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) y_pred_index_last = np.stack( - [ - index_sorted[:, quantile] - for quantile in self.quantiles_ - ], axis=1 + [index_sorted[:, quantile] for quantile in self.quantiles_], axis=1 ) y_pred_proba_last = np.stack( [ np.take_along_axis( - y_pred_proba, - y_pred_index_last[:, iq].reshape(-1, 1), - axis=1 + y_pred_proba, y_pred_index_last[:, iq].reshape(-1, 1), axis=1 ) for iq, _ in enumerate(self.quantiles_) - ], axis=2 + ], + axis=2, ) prediction_sets = np.greater_equal( - y_pred_proba[:, :, np.newaxis] - - y_pred_proba_last, - -EPSILON + y_pred_proba[:, :, np.newaxis] - y_pred_proba_last, -EPSILON ) else: - raise ValueError( - "Invalid method. " - f"Allowed values are {self.valid_methods_}." - ) + raise ValueError("Invalid method. " f"Allowed values are {self.valid_methods_}.") return y_pred, prediction_sets diff --git a/mapie/conformity_scores/__init__.py b/mapie/conformity_scores/__init__.py index 0dab4b62..53e3d915 100644 --- a/mapie/conformity_scores/__init__.py +++ b/mapie/conformity_scores/__init__.py @@ -1,11 +1,13 @@ from .conformity_scores import ConformityScore -from .residual_conformity_scores import (AbsoluteConformityScore, - GammaConformityScore, - ResidualNormalisedScore) +from .residual_conformity_scores import ( + AbsoluteConformityScore, + GammaConformityScore, + ResidualNormalisedScore, +) __all__ = [ "ConformityScore", "AbsoluteConformityScore", "GammaConformityScore", - "ResidualNormalisedScore" + "ResidualNormalisedScore", ] diff --git a/mapie/conformity_scores/conformity_scores.py b/mapie/conformity_scores/conformity_scores.py index ef4a79ad..b61d9bd4 100644 --- a/mapie/conformity_scores/conformity_scores.py +++ b/mapie/conformity_scores/conformity_scores.py @@ -86,10 +86,7 @@ def get_signed_conformity_scores( @abstractmethod def get_estimation_distribution( - self, - X: ArrayLike, - y_pred: ArrayLike, - conformity_scores: ArrayLike + self, X: ArrayLike, y_pred: ArrayLike, conformity_scores: ArrayLike ) -> NDArray: """ Placeholder for ``get_estimation_distribution``. @@ -159,9 +156,7 @@ def check_consistency( ValueError If the two methods are not consistent. """ - score_distribution = self.get_estimation_distribution( - X, y_pred, conformity_scores - ) + score_distribution = self.get_estimation_distribution(X, y_pred, conformity_scores) abs_conformity_scores = np.abs(np.subtract(score_distribution, y)) max_conf_score = np.max(abs_conformity_scores) if max_conf_score > self.eps: @@ -211,10 +206,7 @@ def get_conformity_scores( @staticmethod def get_quantile( - conformity_scores: NDArray, - alpha_np: NDArray, - axis: int, - method: str + conformity_scores: NDArray, alpha_np: NDArray, axis: int, method: str ) -> NDArray: """ Compute the alpha quantile of the conformity scores or the conformity @@ -243,15 +235,12 @@ def get_quantile( NDArray of shape (1, n_alpha) or (n_samples, n_alpha) The quantile of the conformity scores. """ - quantile = np.column_stack([ - np_nanquantile( - conformity_scores.astype(float), - _alpha, - axis=axis, - method=method - ) - for _alpha in alpha_np - ]) + quantile = np.column_stack( + [ + np_nanquantile(conformity_scores.astype(float), _alpha, axis=axis, method=method) + for _alpha in alpha_np + ] + ) return quantile def get_bounds( @@ -261,7 +250,7 @@ def get_bounds( conformity_scores: NDArray, alpha_np: NDArray, ensemble: bool, - method: str + method: str, ) -> Tuple[NDArray, NDArray, NDArray]: """ Compute bounds of the prediction intervals from the observed values, @@ -314,12 +303,8 @@ def get_bounds( conformity_scores_up = self.get_estimation_distribution( X, y_pred_up, conformity_scores ) - bound_low = self.get_quantile( - conformity_scores_low, alpha_low, axis=1, method="lower" - ) - bound_up = self.get_quantile( - conformity_scores_up, alpha_up, axis=1, method="higher" - ) + bound_low = self.get_quantile(conformity_scores_low, alpha_low, axis=1, method="lower") + bound_up = self.get_quantile(conformity_scores_up, alpha_up, axis=1, method="higher") else: quantile_search = "higher" if self.sym else "lower" alpha_low = 1 - alpha_np if self.sym else alpha_np / 2 @@ -328,14 +313,8 @@ def get_bounds( quantile_low = self.get_quantile( conformity_scores, alpha_low, axis=0, method=quantile_search ) - quantile_up = self.get_quantile( - conformity_scores, alpha_up, axis=0, method="higher" - ) - bound_low = self.get_estimation_distribution( - X, y_pred_low, signed * quantile_low - ) - bound_up = self.get_estimation_distribution( - X, y_pred_up, quantile_up - ) + quantile_up = self.get_quantile(conformity_scores, alpha_up, axis=0, method="higher") + bound_low = self.get_estimation_distribution(X, y_pred_low, signed * quantile_low) + bound_up = self.get_estimation_distribution(X, y_pred_up, quantile_up) return y_pred, bound_low, bound_up diff --git a/mapie/conformity_scores/residual_conformity_scores.py b/mapie/conformity_scores/residual_conformity_scores.py index 798a206c..035712bc 100644 --- a/mapie/conformity_scores/residual_conformity_scores.py +++ b/mapie/conformity_scores/residual_conformity_scores.py @@ -6,9 +6,7 @@ from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline -from sklearn.utils.validation import (check_is_fitted, - check_random_state, - indexable) +from sklearn.utils.validation import check_is_fitted, check_random_state, indexable from mapie._machine_precision import EPSILON from mapie._typing import ArrayLike, NDArray @@ -46,10 +44,7 @@ def get_signed_conformity_scores( return np.subtract(y, y_pred) def get_estimation_distribution( - self, - X: ArrayLike, - y_pred: ArrayLike, - conformity_scores: ArrayLike + self, X: ArrayLike, y_pred: ArrayLike, conformity_scores: ArrayLike ) -> NDArray: """ Compute samples of the estimation distribution from the predicted @@ -126,10 +121,7 @@ def get_signed_conformity_scores( return np.divide(np.subtract(y, y_pred), y_pred) def get_estimation_distribution( - self, - X: ArrayLike, - y_pred: ArrayLike, - conformity_scores: ArrayLike + self, X: ArrayLike, y_pred: ArrayLike, conformity_scores: ArrayLike ) -> NDArray: """ Compute samples of the estimation distribution from the predicted @@ -191,7 +183,7 @@ def __init__( split_size: Optional[Union[int, float]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, sym: bool = True, - consistency_check: bool = False + consistency_check: bool = False, ) -> None: super().__init__(sym=sym, consistency_check=consistency_check) self.prefit = prefit @@ -199,9 +191,7 @@ def __init__( self.split_size = split_size self.random_state = random_state - def _check_estimator( - self, estimator: Optional[RegressorMixin] = None - ) -> RegressorMixin: + def _check_estimator(self, estimator: Optional[RegressorMixin] = None) -> RegressorMixin: """ Check if estimator is ``None``, and returns a ``LinearRegression`` instance if necessary. @@ -231,8 +221,7 @@ def _check_estimator( if estimator is None: return LinearRegression() else: - if not (hasattr(estimator, "fit") and - hasattr(estimator, "predict")): + if not (hasattr(estimator, "fit") and hasattr(estimator, "predict")): raise ValueError( "Invalid estimator. " "Please provide a regressor with fit and predict methods." @@ -245,12 +234,8 @@ def _check_estimator( return estimator def _check_parameters( - self, - X: ArrayLike, - y: ArrayLike, - y_pred: ArrayLike - ) -> Tuple[NDArray, NDArray, NDArray, RegressorMixin, - Union[int, np.random.RandomState]]: + self, X: ArrayLike, y: ArrayLike, y_pred: ArrayLike + ) -> Tuple[NDArray, NDArray, NDArray, RegressorMixin, Union[int, np.random.RandomState]]: """ Checks all the parameters of the class. Raises an error if the parameter are not well defined. @@ -276,9 +261,7 @@ def _check_parameters( - residual_estimator - random_state """ - residual_estimator = self._check_estimator( - self.residual_estimator - ) + residual_estimator = self._check_estimator(self.residual_estimator) random_state = check_random_state(self.random_state) X, y, y_pred = indexable(X, y, y_pred) X = np.array(X) @@ -314,19 +297,13 @@ def _fit_residual_estimator( Fitted residual estimator """ residuals = np.abs(np.subtract(y, y_pred)) - targets = np.log(np.maximum( - residuals, - np.full(residuals.shape, self.eps) - )) + targets = np.log(np.maximum(residuals, np.full(residuals.shape, self.eps))) residual_estimator_ = residual_estimator_.fit(X, targets) return residual_estimator_ - def _predict_residual_estimator( - self, - X: ArrayLike - ) -> NDArray: + def _predict_residual_estimator(self, X: ArrayLike) -> NDArray: """ Returns the predictions of the residual estimator. Raises a warning if the model predicts neagtive values. @@ -360,10 +337,7 @@ def _predict_residual_estimator( return pred def get_signed_conformity_scores( - self, - X: ArrayLike, - y: ArrayLike, - y_pred: ArrayLike + self, X: ArrayLike, y: ArrayLike, y_pred: ArrayLike ) -> NDArray: """ Computes the signed conformity score = (y - y_pred) / r_pred. @@ -374,13 +348,11 @@ def get_signed_conformity_scores( The learning is done with the log of the residual and later we use the exponential of the prediction to avoid negative values. """ - (X, y, y_pred, - self.residual_estimator_, - random_state) = self._check_parameters(X, y, y_pred) + (X, y, y_pred, self.residual_estimator_, random_state) = self._check_parameters( + X, y, y_pred + ) - full_indexes = np.argwhere( - np.logical_not(np.isnan(y_pred)) - ).reshape((-1,)) + full_indexes = np.argwhere(np.logical_not(np.isnan(y_pred))).reshape((-1,)) if not self.prefit: cal_indexes, res_indexes = train_test_split( @@ -390,37 +362,29 @@ def get_signed_conformity_scores( ) self.residual_estimator_ = self._fit_residual_estimator( clone(self.residual_estimator_), - X[res_indexes], y[res_indexes], y_pred[res_indexes] + X[res_indexes], + y[res_indexes], + y_pred[res_indexes], ) residuals_pred = np.maximum( - np.exp(self._predict_residual_estimator(X[cal_indexes])), - self.eps + np.exp(self._predict_residual_estimator(X[cal_indexes])), self.eps ) else: cal_indexes = full_indexes - residuals_pred = np.maximum( - self._predict_residual_estimator(X[cal_indexes]), - self.eps - ) + residuals_pred = np.maximum(self._predict_residual_estimator(X[cal_indexes]), self.eps) signed_conformity_scores = np.divide( - np.subtract(y[cal_indexes], y_pred[cal_indexes]), - residuals_pred + np.subtract(y[cal_indexes], y_pred[cal_indexes]), residuals_pred ) # reconstruct array with nan and conformity scores - complete_signed_cs = np.full( - y_pred.shape, fill_value=np.nan, dtype=float - ) + complete_signed_cs = np.full(y_pred.shape, fill_value=np.nan, dtype=float) complete_signed_cs[cal_indexes] = signed_conformity_scores return complete_signed_cs def get_estimation_distribution( - self, - X: ArrayLike, - y_pred: ArrayLike, - conformity_scores: ArrayLike + self, X: ArrayLike, y_pred: ArrayLike, conformity_scores: ArrayLike ) -> NDArray: """ Compute samples of the estimation distribution from the predicted @@ -435,9 +399,6 @@ def get_estimation_distribution( """ r_pred = self._predict_residual_estimator(X).reshape((-1, 1)) if not self.prefit: - return np.add( - y_pred, - np.multiply(conformity_scores, np.exp(r_pred)) - ) + return np.add(y_pred, np.multiply(conformity_scores, np.exp(r_pred))) else: return np.add(y_pred, np.multiply(conformity_scores, r_pred)) diff --git a/mapie/control_risk/crc_rcps.py b/mapie/control_risk/crc_rcps.py index 47c593a6..0831fe71 100644 --- a/mapie/control_risk/crc_rcps.py +++ b/mapie/control_risk/crc_rcps.py @@ -10,7 +10,7 @@ def get_r_hat_plus( method: Optional[str], bound: Optional[str], delta: Optional[float], - sigma_init: Optional[float] + sigma_init: Optional[float], ) -> Tuple[NDArray, NDArray]: """ Compute the upper bound of the loss for each lambda. @@ -64,89 +64,50 @@ def get_r_hat_plus( if (method == "rcps") and (delta is not None): if bound == "hoeffding": - r_hat_plus = ( - r_hat + - np.sqrt((1 / (2 * n_obs)) * np.log(1 / delta)) - ) + r_hat_plus = r_hat + np.sqrt((1 / (2 * n_obs)) * np.log(1 / delta)) elif bound == "bernstein": sigma_hat_bern = np.var(r_hat, axis=0, ddof=1) r_hat_plus = ( - r_hat + - np.sqrt((sigma_hat_bern * 2 * np.log(2 / delta)) / n_obs) + - (7 * np.log(2 / delta)) / (3 * (n_obs - 1)) + r_hat + + np.sqrt((sigma_hat_bern * 2 * np.log(2 / delta)) / n_obs) + + (7 * np.log(2 / delta)) / (3 * (n_obs - 1)) ) else: - mu_hat = ( - (.5 + np.cumsum(risks, axis=0)) / - (np.repeat( - [range(1, n_obs + 1)], - n_lambdas, - axis=0 - ).T + 1) + mu_hat = (0.5 + np.cumsum(risks, axis=0)) / ( + np.repeat([range(1, n_obs + 1)], n_lambdas, axis=0).T + 1 ) - sigma_hat = ( - (.25 + np.cumsum((risks - mu_hat)**2, axis=0)) / - (np.repeat( - [range(1, n_obs + 1)], - n_lambdas, - axis=0 - ).T + 1) + sigma_hat = (0.25 + np.cumsum((risks - mu_hat) ** 2, axis=0)) / ( + np.repeat([range(1, n_obs + 1)], n_lambdas, axis=0).T + 1 ) sigma_hat = np.concatenate( - [ - np.full( - (1, n_lambdas), fill_value=sigma_init - ), sigma_hat[:-1] - ] - ) - nu = np.minimum( - 1, - np.sqrt((2 * np.log(1 / delta)) / (n_obs * sigma_hat)) + [np.full((1, n_lambdas), fill_value=sigma_init), sigma_hat[:-1]] ) + nu = np.minimum(1, np.sqrt((2 * np.log(1 / delta)) / (n_obs * sigma_hat))) # Split the calculation in two to prevent memory issues - batches = [ - range(int(n_obs / 2)), - range(n_obs - int(n_obs / 2), n_obs) - ] + batches = [range(int(n_obs / 2)), range(n_obs - int(n_obs / 2), n_obs)] K_R_max = np.zeros((n_lambdas, n_lambdas)) for batch in batches: nu_batch = nu[batch] losses_batch = risks[batch] - nu_batch = np.repeat( - np.expand_dims(nu_batch, axis=2), - n_lambdas, - axis=2 - ) - losses_batch = np.repeat( - np.expand_dims(losses_batch, axis=2), - n_lambdas, - axis=2 - ) + nu_batch = np.repeat(np.expand_dims(nu_batch, axis=2), n_lambdas, axis=2) + losses_batch = np.repeat(np.expand_dims(losses_batch, axis=2), n_lambdas, axis=2) R = lambdas K_R = np.cumsum( - np.log( - ( - 1 - - nu_batch * - (losses_batch - R) - ) + - np.finfo(np.float64).eps - ), - axis=0 + np.log((1 - nu_batch * (losses_batch - R)) + np.finfo(np.float64).eps), axis=0 ) K_R = np.max(K_R, axis=0) K_R_max += K_R - r_hat_plus_tronc = lambdas[np.argwhere( - np.cumsum(K_R_max > -np.log(delta), axis=1) == 1 - )[:, 1]] + r_hat_plus_tronc = lambdas[ + np.argwhere(np.cumsum(K_R_max > -np.log(delta), axis=1) == 1)[:, 1] + ] r_hat_plus = np.ones(n_lambdas) - r_hat_plus[:len(r_hat_plus_tronc)] = r_hat_plus_tronc + r_hat_plus[: len(r_hat_plus_tronc)] = r_hat_plus_tronc else: r_hat_plus = (n_obs / (n_obs + 1)) * r_hat + (1 / (n_obs + 1)) @@ -154,11 +115,7 @@ def get_r_hat_plus( return r_hat, r_hat_plus -def find_lambda_star( - lambdas: NDArray, - r_hat_plus: NDArray, - alpha_np: NDArray -) -> NDArray: +def find_lambda_star(lambdas: NDArray, r_hat_plus: NDArray, alpha_np: NDArray) -> NDArray: """Find the higher value of lambda such that for all smaller lambda, the risk is smaller, for each value of alpha. @@ -196,20 +153,9 @@ def find_lambda_star( else: alphas_np = alpha_np - bound_rep = np.repeat( - np.expand_dims(r_hat_plus, axis=0), - len(alphas_np), - axis=0 - ) + bound_rep = np.repeat(np.expand_dims(r_hat_plus, axis=0), len(alphas_np), axis=0) bound_rep[:, np.argmax(bound_rep, axis=1)] = np.maximum( - alphas_np, - bound_rep[:, np.argmax(bound_rep, axis=1)] + alphas_np, bound_rep[:, np.argmax(bound_rep, axis=1)] ) # to avoid an error if the risk is always higher than alpha - lambdas_star = lambdas[np.argmin( - - np.greater_equal( - bound_rep, - alphas_np - ).astype(int), - axis=1 - )] + lambdas_star = lambdas[np.argmin(-np.greater_equal(bound_rep, alphas_np).astype(int), axis=1)] return lambdas_star diff --git a/mapie/control_risk/ltt.py b/mapie/control_risk/ltt.py index ac2e526d..b71a53d4 100644 --- a/mapie/control_risk/ltt.py +++ b/mapie/control_risk/ltt.py @@ -9,10 +9,7 @@ def ltt_procedure( - r_hat: NDArray, - alpha_np: NDArray, - delta: Optional[float], - n_obs: int + r_hat: NDArray, alpha_np: NDArray, delta: Optional[float], n_obs: int ) -> Tuple[List[List[Any]], NDArray]: """ Apply the Learn-Then-Test procedure for risk control. @@ -60,22 +57,19 @@ def ltt_procedure( """ if delta is None: raise ValueError( - "Invalid delta: delta cannot be None while" - + " controlling precision with LTT. " + "Invalid delta: delta cannot be None while" + " controlling precision with LTT. " ) p_values = compute_hoeffdding_bentkus_p_value(r_hat, n_obs, alpha_np) N = len(p_values) valid_index = [] for i in range(len(alpha_np)): - l_index = np.where(p_values[:, i] <= delta/N)[0].tolist() + l_index = np.where(p_values[:, i] <= delta / N)[0].tolist() valid_index.append(l_index) return valid_index, p_values def find_lambda_control_star( - r_hat: NDArray, - valid_index: List[List[Any]], - lambdas: NDArray + r_hat: NDArray, valid_index: List[List[Any]], lambdas: NDArray ) -> Tuple[ArrayLike, ArrayLike]: """ Return the lambda that give the minimum precision along diff --git a/mapie/control_risk/p_values.py b/mapie/control_risk/p_values.py index 981bf0bb..559cd2d4 100644 --- a/mapie/control_risk/p_values.py +++ b/mapie/control_risk/p_values.py @@ -8,9 +8,7 @@ def compute_hoeffdding_bentkus_p_value( - r_hat: NDArray, - n_obs: int, - alpha: Union[float, NDArray] + r_hat: NDArray, n_obs: int, alpha: Union[float, NDArray] ) -> NDArray: """ The method computes the p_values according to @@ -51,41 +49,18 @@ def compute_hoeffdding_bentkus_p_value( """ alpha_np = cast(NDArray, check_alpha(alpha)) alpha_np = alpha_np[:, np.newaxis] - r_hat_repeat = np.repeat( - np.expand_dims(r_hat, axis=1), - len(alpha_np), - axis=1 - ) - alpha_repeat = np.repeat( - alpha_np.reshape(1, -1), - len(r_hat), - axis=0 - ) + r_hat_repeat = np.repeat(np.expand_dims(r_hat, axis=1), len(alpha_np), axis=1) + alpha_repeat = np.repeat(alpha_np.reshape(1, -1), len(r_hat), axis=0) hoeffding_p_value = np.exp( - -n_obs * _h1( - np.where( - r_hat_repeat > alpha_repeat, - alpha_repeat, - r_hat_repeat - ), - alpha_repeat - ) - ) - bentkus_p_value = np.e * binom.cdf( - np.ceil(n_obs * r_hat_repeat), n_obs, alpha_repeat - ) - hb_p_value = np.where( - bentkus_p_value > hoeffding_p_value, - hoeffding_p_value, - bentkus_p_value + -n_obs + * _h1(np.where(r_hat_repeat > alpha_repeat, alpha_repeat, r_hat_repeat), alpha_repeat) ) + bentkus_p_value = np.e * binom.cdf(np.ceil(n_obs * r_hat_repeat), n_obs, alpha_repeat) + hb_p_value = np.where(bentkus_p_value > hoeffding_p_value, hoeffding_p_value, bentkus_p_value) return hb_p_value -def _h1( - r_hats: NDArray, - alphas: NDArray -) -> NDArray: +def _h1(r_hats: NDArray, alphas: NDArray) -> NDArray: """ This function allow us to compute the tighter version of hoeffding inequality. @@ -114,6 +89,6 @@ def _h1( ------- NDArray of shape a(n_lambdas, n_alpha). """ - elt1 = r_hats * np.log(r_hats/alphas) - elt2 = (1-r_hats) * np.log((1-r_hats)/(1-alphas)) + elt1 = r_hats * np.log(r_hats / alphas) + elt2 = (1 - r_hats) * np.log((1 - r_hats) / (1 - alphas)) return elt1 + elt2 diff --git a/mapie/control_risk/risks.py b/mapie/control_risk/risks.py index cd9aa9e5..f64808cb 100644 --- a/mapie/control_risk/risks.py +++ b/mapie/control_risk/risks.py @@ -6,11 +6,7 @@ from sklearn.utils.validation import column_or_1d -def compute_risk_recall( - lambdas: NDArray, - y_pred_proba: NDArray, - y: NDArray -) -> NDArray: +def compute_risk_recall(lambdas: NDArray, y_pred_proba: NDArray, y: NDArray) -> NDArray: """ In `MapieMultiLabelClassifier` when `metric_control=recall`, compute the recall per observation for each different @@ -43,32 +39,19 @@ def compute_risk_recall( "{} instead.".format(y_pred_proba.shape) ) if not np.array_equal(y_pred_proba.shape[:-1], y.shape): - raise ValueError( - "y and y_pred_proba could not be broadcast." - ) + raise ValueError("y and y_pred_proba could not be broadcast.") lambdas = cast(NDArray, column_or_1d(lambdas)) n_lambdas = len(lambdas) - y_pred_proba_repeat = np.repeat( - y_pred_proba, - n_lambdas, - axis=2 - ) + y_pred_proba_repeat = np.repeat(y_pred_proba, n_lambdas, axis=2) y_pred_th = (y_pred_proba_repeat > lambdas).astype(int) y_repeat = np.repeat(y[..., np.newaxis], n_lambdas, axis=2) - risks = 1 - ( - _true_positive(y_pred_th, y_repeat) / - y.sum(axis=1)[:, np.newaxis] - ) + risks = 1 - (_true_positive(y_pred_th, y_repeat) / y.sum(axis=1)[:, np.newaxis]) return risks -def compute_risk_precision( - lambdas: NDArray, - y_pred_proba: NDArray, - y: NDArray -) -> NDArray: +def compute_risk_precision(lambdas: NDArray, y_pred_proba: NDArray, y: NDArray) -> NDArray: """ In `MapieMultiLabelClassifier` when `metric_control=precision`, compute the precision per observation for each different @@ -101,31 +84,22 @@ def compute_risk_precision( "{} instead.".format(y_pred_proba.shape) ) if not np.array_equal(y_pred_proba.shape[:-1], y.shape): - raise ValueError( - "y and y_pred_proba could not be broadcast." - ) + raise ValueError("y and y_pred_proba could not be broadcast.") lambdas = cast(NDArray, column_or_1d(lambdas)) n_lambdas = len(lambdas) - y_pred_proba_repeat = np.repeat( - y_pred_proba, - n_lambdas, - axis=2 - ) + y_pred_proba_repeat = np.repeat(y_pred_proba, n_lambdas, axis=2) y_pred_th = (y_pred_proba_repeat > lambdas).astype(int) y_repeat = np.repeat(y[..., np.newaxis], n_lambdas, axis=2) - with np.errstate(divide='ignore', invalid="ignore"): - risks = 1 - _true_positive(y_pred_th, y_repeat)/y_pred_th.sum(axis=1) + with np.errstate(divide="ignore", invalid="ignore"): + risks = 1 - _true_positive(y_pred_th, y_repeat) / y_pred_th.sum(axis=1) risks[np.isnan(risks)] = 1 # nan value indicate high risks. return risks -def _true_positive( - y_pred_th: NDArray, - y_repeat: NDArray -) -> NDArray: +def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray: """ Compute the number of true positive. diff --git a/mapie/estimator/estimator.py b/mapie/estimator/estimator.py index 33bda7b3..3931dfdd 100644 --- a/mapie/estimator/estimator.py +++ b/mapie/estimator/estimator.py @@ -7,12 +7,11 @@ from sklearn.base import RegressorMixin, clone from sklearn.model_selection import BaseCrossValidator, ShuffleSplit from sklearn.utils import _safe_indexing -from sklearn.utils.validation import (_num_samples, check_is_fitted) +from sklearn.utils.validation import _num_samples, check_is_fitted from mapie._typing import ArrayLike, NDArray from mapie.aggregation_functions import aggregate_all, phi2D -from mapie.utils import (check_nan_in_aposteriori_prediction, - fit_estimator) +from mapie.utils import check_nan_in_aposteriori_prediction, fit_estimator from mapie.estimator.interface import EnsembleEstimator @@ -146,6 +145,7 @@ class EnsembleRegressor(EnsembleEstimator): - Dummy array of folds containing each training sample, otherwise. Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)). """ + no_agg_cv_ = ["prefit", "split"] no_agg_methods_ = ["naive", "base"] fit_attributes = [ @@ -163,7 +163,7 @@ def __init__( n_jobs: Optional[int], random_state: Optional[Union[int, np.random.RandomState]], test_size: Optional[Union[int, float]], - verbose: int + verbose: int, ): self.estimator = estimator self.method = method @@ -214,9 +214,7 @@ def _fit_oof_estimator( sample_weight = _safe_indexing(sample_weight, train_index) sample_weight = cast(NDArray, sample_weight) - estimator = fit_estimator( - estimator, X_train, y_train, sample_weight=sample_weight - ) + estimator = fit_estimator(estimator, X_train, y_train, sample_weight=sample_weight) return estimator @staticmethod @@ -251,11 +249,7 @@ def _predict_oof_estimator( y_pred = np.array([]) return y_pred, val_index - def _aggregate_with_mask( - self, - x: NDArray, - k: NDArray - ) -> NDArray: + def _aggregate_with_mask(self, x: NDArray, k: NDArray) -> NDArray: """ Take the array of predictions, made by the refitted estimators, on the testing set, and the 1-or-nan array indicating for each training @@ -311,9 +305,7 @@ def _pred_multi(self, X: ArrayLike) -> NDArray: ------- NDArray of shape (n_samples_test, n_samples_train) """ - y_pred_multi = np.column_stack( - [e.predict(X) for e in self.estimators_] - ) + y_pred_multi = np.column_stack([e.predict(X) for e in self.estimators_]) # At this point, y_pred_multi is of shape # (n_samples_test, n_estimators_). The method # ``_aggregate_with_mask`` fits it to the right size @@ -346,14 +338,13 @@ def predict_calib(self, X: ArrayLike) -> NDArray: cv = cast(BaseCrossValidator, self.cv) outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._predict_oof_estimator)( - estimator, X, calib_index, + estimator, + X, + calib_index, ) - for (_, calib_index), estimator in zip(cv.split(X), - self.estimators_) - ) - predictions, indices = map( - list, zip(*outputs) + for (_, calib_index), estimator in zip(cv.split(X), self.estimators_) ) + predictions, indices = map(list, zip(*outputs)) n_samples = _num_samples(X) pred_matrix = np.full( shape=(n_samples, cv.get_n_splits(X)), @@ -361,9 +352,7 @@ def predict_calib(self, X: ArrayLike) -> NDArray: dtype=float, ) for i, ind in enumerate(indices): - pred_matrix[ind, i] = np.array( - predictions[i], dtype=float - ) + pred_matrix[ind, i] = np.array(predictions[i], dtype=float) self.k_[ind, i] = 1 check_nan_in_aposteriori_prediction(pred_matrix) @@ -412,9 +401,7 @@ def fit( # Computation if cv == "prefit": single_estimator_ = estimator - self.k_ = np.full( - shape=(n_samples, 1), fill_value=np.nan, dtype=float - ) + self.k_ = np.full(shape=(n_samples, 1), fill_value=np.nan, dtype=float) else: single_estimator_ = self._fit_oof_estimator( clone(estimator), X, y, full_indexes, sample_weight @@ -443,10 +430,7 @@ def fit( return self def predict( - self, - X: ArrayLike, - ensemble: bool = False, - return_multi_pred: bool = True + self, X: ArrayLike, ensemble: bool = False, return_multi_pred: bool = True ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py index 468f3318..0f5e7d38 100644 --- a/mapie/estimator/interface.py +++ b/mapie/estimator/interface.py @@ -1,4 +1,5 @@ from __future__ import annotations +from abc import ABCMeta, abstractmethod from typing import Optional, Tuple, Union @@ -7,13 +8,14 @@ from mapie._typing import ArrayLike, NDArray -class EnsembleEstimator(RegressorMixin): +class EnsembleEstimator(RegressorMixin, metaclass=ABCMeta): """ This class implements methods to handle the training and usage of the estimator. This estimator can be unique or composed by cross validated estimators. """ + @abstractmethod def fit( self, X: ArrayLike, @@ -45,11 +47,9 @@ def fit( The estimator fitted. """ + @abstractmethod def predict( - self, - X: ArrayLike, - ensemble: bool = False, - return_multi_pred: bool = True + self, X: ArrayLike, ensemble: bool = False, return_multi_pred: bool = True ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample diff --git a/mapie/metrics.py b/mapie/metrics.py index 1b562880..8d512400 100644 --- a/mapie/metrics.py +++ b/mapie/metrics.py @@ -6,14 +6,16 @@ from sklearn.utils.validation import check_array, column_or_1d from ._typing import ArrayLike, NDArray -from .utils import (calc_bins, - check_array_shape_classification, - check_array_shape_regression, - check_binary_zero_one, - check_nb_intervals_sizes, - check_nb_sets_sizes, - check_number_bins, - check_split_strategy) +from .utils import ( + calc_bins, + check_array_shape_classification, + check_array_shape_regression, + check_binary_zero_one, + check_nb_intervals_sizes, + check_nb_sets_sizes, + check_number_bins, + check_split_strategy, +) from ._machine_precision import EPSILON @@ -55,16 +57,11 @@ def regression_coverage_score( y_true = cast(NDArray, column_or_1d(y_true)) y_pred_low = cast(NDArray, column_or_1d(y_pred_low)) y_pred_up = cast(NDArray, column_or_1d(y_pred_up)) - coverage = np.mean( - ((y_pred_low <= y_true) & (y_pred_up >= y_true)) - ) + coverage = np.mean(((y_pred_low <= y_true) & (y_pred_up >= y_true))) return float(coverage) -def classification_coverage_score( - y_true: ArrayLike, - y_pred_set: ArrayLike -) -> float: +def classification_coverage_score(y_true: ArrayLike, y_pred_set: ArrayLike) -> float: """ Effective coverage score obtained by the prediction sets. @@ -99,22 +96,12 @@ def classification_coverage_score( 0.8 """ y_true = cast(NDArray, column_or_1d(y_true)) - y_pred_set = cast( - NDArray, - check_array( - y_pred_set, force_all_finite=True, dtype=["bool"] - ) - ) - coverage = np.take_along_axis( - y_pred_set, y_true.reshape(-1, 1), axis=1 - ).mean() + y_pred_set = cast(NDArray, check_array(y_pred_set, force_all_finite=True, dtype=["bool"])) + coverage = np.take_along_axis(y_pred_set, y_true.reshape(-1, 1), axis=1).mean() return float(coverage) -def regression_mean_width_score( - y_pred_low: ArrayLike, - y_pred_up: ArrayLike -) -> float: +def regression_mean_width_score(y_pred_low: ArrayLike, y_pred_up: ArrayLike) -> float: """ Effective mean width score obtained by the prediction intervals. @@ -174,12 +161,7 @@ def classification_mean_width_score(y_pred_set: ArrayLike) -> float: >>> print(classification_mean_width_score(y_pred_set)) 2.0 """ - y_pred_set = cast( - NDArray, - check_array( - y_pred_set, force_all_finite=True, dtype=["bool"] - ) - ) + y_pred_set = cast(NDArray, check_array(y_pred_set, force_all_finite=True, dtype=["bool"])) mean_width = y_pred_set.sum(axis=1).mean() return float(mean_width) @@ -222,20 +204,13 @@ def expected_calibration_error( y_scores = cast(NDArray, y_scores) if np.size(y_scores.shape) == 2: - y_score = cast( - NDArray, column_or_1d(np.nanmax(y_scores, axis=1)) - ) + y_score = cast(NDArray, column_or_1d(np.nanmax(y_scores, axis=1))) else: y_score = cast(NDArray, column_or_1d(y_scores)) - _, bin_accs, bin_confs, bin_sizes = calc_bins( - y_true_, y_score, num_bins, split_strategy - ) + _, bin_accs, bin_confs, bin_sizes = calc_bins(y_true_, y_score, num_bins, split_strategy) - return np.divide( - np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), - np.sum(bin_sizes) - ) + return np.divide(np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes)) def top_label_ece( @@ -283,23 +258,17 @@ def top_label_ece( float The ECE score adapted in the top label setting. """ - ece = float(0.) + ece = float(0.0) split_strategy = check_split_strategy(split_strategy) num_bins = check_number_bins(num_bins) y_true = cast(NDArray, column_or_1d(y_true)) if y_score_arg is None: - y_score = cast( - NDArray, column_or_1d(np.nanmax(y_scores, axis=1)) - ) + y_score = cast(NDArray, column_or_1d(np.nanmax(y_scores, axis=1))) if classes is None: - y_score_arg = cast( - NDArray, column_or_1d(np.nanargmax(y_scores, axis=1)) - ) + y_score_arg = cast(NDArray, column_or_1d(np.nanargmax(y_scores, axis=1))) else: classes = cast(NDArray, classes) - y_score_arg = cast( - NDArray, column_or_1d(classes[np.nanargmax(y_scores, axis=1)]) - ) + y_score_arg = cast(NDArray, column_or_1d(classes[np.nanargmax(y_scores, axis=1)])) else: y_score = cast(NDArray, column_or_1d(y_scores)) y_score_arg = cast(NDArray, column_or_1d(y_score_arg)) @@ -309,10 +278,7 @@ def top_label_ece( label_ind = np.where(label == y_score_arg)[0] y_true_ = np.array(y_true[label_ind] == label, dtype=int) ece += expected_calibration_error( - y_true_, - y_scores=y_score[label_ind], - num_bins=num_bins, - split_strategy=split_strategy + y_true_, y_scores=y_score[label_ind], num_bins=num_bins, split_strategy=split_strategy ) ece /= len(labels) return ece @@ -352,17 +318,14 @@ def regression_coverage_score_v2( coverages = np.mean( np.logical_and( np.less_equal(y_intervals[:, 0, :], y_true), - np.greater_equal(y_intervals[:, 1, :], y_true) + np.greater_equal(y_intervals[:, 1, :], y_true), ), - axis=0 + axis=0, ) return coverages -def classification_coverage_score_v2( - y_true: NDArray, - y_pred_set: NDArray -) -> NDArray: +def classification_coverage_score_v2(y_true: NDArray, y_pred_set: NDArray) -> NDArray: """ Effective coverage score obtained by the prediction sets. @@ -390,18 +353,11 @@ def classification_coverage_score_v2( y_true = cast(NDArray, column_or_1d(y_true)) y_true = np.expand_dims(y_true, axis=1) y_true = np.expand_dims(y_true, axis=1) - coverage = np.nanmean( - np.take_along_axis(y_pred_set, y_true, axis=1), - axis=0 - ) + coverage = np.nanmean(np.take_along_axis(y_pred_set, y_true, axis=1), axis=0) return coverage[0] -def regression_ssc( - y_true: NDArray, - y_intervals: NDArray, - num_bins: int = 3 -) -> NDArray: +def regression_ssc(y_true: NDArray, y_intervals: NDArray, num_bins: int = 3) -> NDArray: """ Compute Size-Stratified Coverage metrics proposed in [3] that is the conditional coverage conditioned by the size of the intervals. @@ -454,21 +410,19 @@ def regression_ssc( indexes_bybins = np.array_split(indexes_sorted, num_bins, axis=0) coverages = np.zeros((y_intervals.shape[2], num_bins)) for i, indexes in enumerate(indexes_bybins): - intervals_binned = np.stack([ - np.take_along_axis(y_intervals[:, 0, :], indexes, axis=0), - np.take_along_axis(y_intervals[:, 1, :], indexes, axis=0) - ], axis=1) - coverages[:, i] = regression_coverage_score_v2(y_true[indexes], - intervals_binned) + intervals_binned = np.stack( + [ + np.take_along_axis(y_intervals[:, 0, :], indexes, axis=0), + np.take_along_axis(y_intervals[:, 1, :], indexes, axis=0), + ], + axis=1, + ) + coverages[:, i] = regression_coverage_score_v2(y_true[indexes], intervals_binned) return coverages -def regression_ssc_score( - y_true: NDArray, - y_intervals: NDArray, - num_bins: int = 3 -) -> NDArray: +def regression_ssc_score(y_true: NDArray, y_intervals: NDArray, num_bins: int = 3) -> NDArray: """ Aggregate by the minimum for each alpha the Size-Stratified Coverage [3]: returns the maximum violation of the conditional coverage @@ -509,9 +463,7 @@ def regression_ssc_score( def classification_ssc( - y_true: NDArray, - y_pred_set: NDArray, - num_bins: Union[int, None] = None + y_true: NDArray, y_pred_set: NDArray, num_bins: Union[int, None] = None ) -> NDArray: """ Compute Size-Stratified Coverage metrics proposed in [3] that is @@ -564,34 +516,24 @@ def classification_ssc( else: check_nb_sets_sizes(sizes, num_bins) check_number_bins(num_bins) - bins = [ - b[0] for b in np.array_split(range(n_classes + 1), num_bins) - ] + bins = [b[0] for b in np.array_split(range(n_classes + 1), num_bins)] digitized_sizes = np.digitize(sizes, bins) coverages = np.zeros((y_pred_set.shape[2], len(bins))) for alpha in range(y_pred_set.shape[2]): indexes_bybins = [ - np.argwhere(digitized_sizes[:, alpha] == i) - for i in range(1, len(bins)+1) + np.argwhere(digitized_sizes[:, alpha] == i) for i in range(1, len(bins) + 1) ] for i, indexes in enumerate(indexes_bybins): coverages[alpha, i] = classification_coverage_score_v2( - y_true[indexes], - np.take_along_axis( - y_pred_set[:, :, alpha], - indexes, - axis=0 - ) + y_true[indexes], np.take_along_axis(y_pred_set[:, :, alpha], indexes, axis=0) ) return coverages def classification_ssc_score( - y_true: NDArray, - y_pred_set: NDArray, - num_bins: Union[int, None] = None + y_true: NDArray, y_pred_set: NDArray, num_bins: Union[int, None] = None ) -> NDArray: """ Aggregate by the minimum for each alpha the Size-Stratified Coverage [3]: @@ -631,10 +573,7 @@ def classification_ssc_score( return np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1) -def _gaussian_kernel( - x: NDArray, - kernel_size: int -) -> NDArray: +def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray: """ Computes the gaussian kernel of x. (Used in hsic function) @@ -645,17 +584,12 @@ def _gaussian_kernel( kernel_size: int The variance (sigma), this coefficient controls the width of the curve. """ - norm_x = x ** 2 - dist = -2 * np.matmul(x, x.transpose((0, 2, 1))) \ - + norm_x + norm_x.transpose((0, 2, 1)) + norm_x = x**2 + dist = -2 * np.matmul(x, x.transpose((0, 2, 1))) + norm_x + norm_x.transpose((0, 2, 1)) return np.exp(-dist / kernel_size) -def hsic( - y_true: NDArray, - y_intervals: NDArray, - kernel_sizes: ArrayLike = (1, 1) -) -> NDArray: +def hsic(y_true: NDArray, y_intervals: NDArray, kernel_sizes: ArrayLike = (1, 1)) -> NDArray: """ Compute the square root of the hsic coefficient. HSIC is Hilbert-Schmidt independence criterion that is a correlation measure. Here we use it as @@ -711,32 +645,29 @@ def hsic( y_intervals = check_array_shape_regression(y_true, y_intervals) kernel_sizes = cast(NDArray, column_or_1d(kernel_sizes)) if len(kernel_sizes) != 2: - raise ValueError( - "kernel_sizes should be an ArrayLike of length 2" - ) + raise ValueError("kernel_sizes should be an ArrayLike of length 2") if (kernel_sizes <= 0).any(): - raise ValueError( - "kernel_size should be positive" - ) + raise ValueError("kernel_size should be positive") n_samples, _, n_alpha = y_intervals.shape y_true_per_alpha = np.tile(y_true, (n_alpha, 1)).transpose() widths = np.expand_dims( - np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :]).transpose(), - axis=2 + np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :]).transpose(), axis=2 ) cov_ind = np.expand_dims( np.int_( - ((y_intervals[:, 0, :] <= y_true_per_alpha) & - (y_intervals[:, 1, :] >= y_true_per_alpha)) + ( + (y_intervals[:, 0, :] <= y_true_per_alpha) + & (y_intervals[:, 1, :] >= y_true_per_alpha) + ) ).transpose(), - axis=2 + axis=2, ) k_mat = _gaussian_kernel(widths, kernel_sizes[0]) l_mat = _gaussian_kernel(cov_ind, kernel_sizes[1]) h_mat = np.eye(n_samples) - 1 / n_samples * np.ones((n_samples, n_samples)) hsic_mat = np.matmul(l_mat, np.matmul(h_mat, np.matmul(k_mat, h_mat))) - hsic_mat /= ((n_samples - 1) ** 2) + hsic_mat /= (n_samples - 1) ** 2 coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2)) return coef_hsic @@ -745,7 +676,7 @@ def hsic( def add_jitter( x: NDArray, noise_amplitude: float = 1e-8, - random_state: Optional[Union[int, np.random.RandomState]] = None + random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> NDArray: """ Add a tiny normal distributed perturbation to an array x. @@ -823,7 +754,7 @@ def cumulative_differences( y_true: NDArray, y_score: NDArray, noise_amplitude: float = 1e-8, - random_state: Optional[Union[int, np.random.RandomState]] = 1 + random_state: Optional[Union[int, np.random.RandomState]] = 1, ) -> NDArray: """ Compute the cumulative difference between y_true and y_score, both ordered @@ -874,12 +805,10 @@ def cumulative_differences( """ n = len(y_true) y_score_jittered = add_jitter( - y_score, - noise_amplitude=noise_amplitude, - random_state=random_state + y_score, noise_amplitude=noise_amplitude, random_state=random_state ) y_true_sorted, y_score_sorted = sort_xy_by_y(y_true, y_score_jittered) - cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted)/n + cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted) / n return cumulative_differences @@ -916,7 +845,7 @@ def length_scale(s: NDArray) -> float: 0.16 """ n = len(s) - length_scale = np.sqrt(np.sum(s * (1 - s)))/n + length_scale = np.sqrt(np.sum(s * (1 - s))) / n return length_scale @@ -1007,13 +936,11 @@ def kolmogorov_smirnov_cdf(x: float) -> float: >>> print(np.round(kolmogorov_smirnov_cdf(1), 4)) 0.3708 """ - kmax = np.ceil( - 0.5 + x * np.sqrt(2) / np.pi * np.sqrt(np.log(4 / (np.pi*EPSILON))) - ) + kmax = np.ceil(0.5 + x * np.sqrt(2) / np.pi * np.sqrt(np.log(4 / (np.pi * EPSILON)))) c = 0.0 for k in range(int(kmax)): kplus = k + 1 / 2 - c += (-1)**k / kplus * np.exp(-kplus**2 * np.pi**2 / (2 * x**2)) + c += (-1) ** k / kplus * np.exp(-(kplus**2) * np.pi**2 / (2 * x**2)) c *= 2 / np.pi return c @@ -1154,20 +1081,17 @@ def kuiper_cdf(x: float) -> float: """ kmax = np.ceil( ( - 0.5 + x / (np.pi * np.sqrt(2)) * - np.sqrt( - np.log( - 4 / (np.sqrt(2 * np.pi) * EPSILON) * (1 / x + x / np.pi**2) - ) - ) + 0.5 + + x + / (np.pi * np.sqrt(2)) + * np.sqrt(np.log(4 / (np.sqrt(2 * np.pi) * EPSILON) * (1 / x + x / np.pi**2))) ) ) c = 0.0 for k in range(int(kmax)): kplus = k + 1 / 2 - c += ( - (8 / x**2 + 2 / kplus**2 / np.pi**2) * - np.exp(-2 * kplus**2 * np.pi**2 / x**2) + c += (8 / x**2 + 2 / kplus**2 / np.pi**2) * np.exp( + -2 * kplus**2 * np.pi**2 / x**2 ) return c @@ -1261,15 +1185,9 @@ def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float: """ y_true = column_or_1d(y_true) y_score = column_or_1d(y_score) - numerator = np.sum( - (y_true - y_score) * (1 - 2 * y_score) - ) - denominator = np.sqrt( - np.sum( - (1 - 2 * y_score) ** 2 * y_score * (1 - y_score) - ) - ) - sp_stat = numerator/denominator + numerator = np.sum((y_true - y_score) * (1 - 2 * y_score)) + denominator = np.sqrt(np.sum((1 - 2 * y_score) ** 2 * y_score * (1 - y_score))) + sp_stat = numerator / denominator return sp_stat diff --git a/mapie/multi_label_classification.py b/mapie/multi_label_classification.py index 2d6572b5..a6b3f5dd 100644 --- a/mapie/multi_label_classification.py +++ b/mapie/multi_label_classification.py @@ -10,8 +10,7 @@ from sklearn.multioutput import MultiOutputClassifier from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state -from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, - indexable) +from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable from ._typing import ArrayLike, NDArray from .utils import check_alpha, check_n_jobs, check_verbose @@ -145,30 +144,25 @@ class MapieMultiLabelClassifier(BaseEstimator, ClassifierMixin): [False True False] [False True False]] """ - valid_methods_by_metric_ = { - "precision": ["ltt"], - "recall": ["rcps", "crc"] - } + + valid_methods_by_metric_ = {"precision": ["ltt"], "recall": ["rcps", "crc"]} valid_methods = list(chain(*valid_methods_by_metric_.values())) valid_metric_ = list(valid_methods_by_metric_.keys()) valid_bounds_ = ["hoeffding", "bernstein", "wsr", None] lambdas = np.arange(0, 1, 0.01) n_lambdas = len(lambdas) - fit_attributes = [ - "single_estimator_", - "risks" - ] + fit_attributes = ["single_estimator_", "risks"] sigma_init = 0.25 # Value given in the paper [1] - cal_size = .3 + cal_size = 0.3 def __init__( self, estimator: Optional[ClassifierMixin] = None, - metric_control: Optional[str] = 'recall', + metric_control: Optional[str] = "recall", method: Optional[str] = None, n_jobs: Optional[int] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - verbose: int = 0 + verbose: int = 0, ) -> None: self.estimator = estimator self.metric_control = metric_control @@ -203,16 +197,15 @@ def _check_method(self) -> None: self.method = cast(str, self.method) self.metric_control = cast(str, self.metric_control) - if self.method not in self.valid_methods_by_metric_[ - self.metric_control - ]: + if self.method not in self.valid_methods_by_metric_[self.metric_control]: raise ValueError( "Invalid method for metric: " - + "You are controlling " + self.metric_control - + " and you are using invalid method: " + self.method - + ". Use instead: " + "".join(self.valid_methods_by_metric_[ - self.metric_control] - ) + + "You are controlling " + + self.metric_control + + " and you are using invalid method: " + + self.method + + ". Use instead: " + + "".join(self.valid_methods_by_metric_[self.metric_control]) ) def _check_all_labelled(self, y: NDArray) -> None: @@ -233,9 +226,7 @@ def _check_all_labelled(self, y: NDArray) -> None: """ if not (y.sum(axis=1) > 0).all(): raise ValueError( - "Invalid y. " - "All observations should contain at " - "least one label." + "Invalid y. " "All observations should contain at " "least one label." ) def _check_delta(self, delta: Optional[float]): @@ -259,10 +250,7 @@ def _check_delta(self, delta: Optional[float]): If delta is not ``None`` and method is CRC """ if (not isinstance(delta, float)) and (delta is not None): - raise ValueError( - "Invalid delta. " - f"delta must be a float, not a {type(delta)}" - ) + raise ValueError("Invalid delta. " f"delta must be a float, not a {type(delta)}") if (self.method == "rcps") or (self.method == "ltt"): if delta is None: raise ValueError( @@ -270,11 +258,8 @@ def _check_delta(self, delta: Optional[float]): "delta cannot be ``None`` when controlling " "Recall with RCPS or Precision with LTT" ) - elif ((delta <= 0) or (delta >= 1)): - raise ValueError( - "Invalid delta. " - "delta must be in ]0, 1[" - ) + elif (delta <= 0) or (delta >= 1): + raise ValueError("Invalid delta. " "delta must be in ]0, 1[") if (self.method == "crc") and (delta is not None): warnings.warn( "WARNING: you are using crc method, hence " @@ -294,7 +279,8 @@ def _check_valid_index(self, alpha: NDArray): if self.valid_index[i] == []: warnings.warn( "Warning: LTT method has returned an empty sequence" - + " for alpha=" + str(alpha[i]) + + " for alpha=" + + str(alpha[i]) ) def _check_estimator( @@ -353,14 +339,12 @@ def _check_estimator( "use partial_fit." ) if (estimator is None) and (_refit): - estimator = MultiOutputClassifier( - LogisticRegression(multi_class="multinomial") - ) + estimator = MultiOutputClassifier(LogisticRegression(multi_class="multinomial")) X_train, X_calib, y_train, y_calib = train_test_split( - X, - y, - test_size=self.calib_size, - random_state=self.random_state, + X, + y, + test_size=self.calib_size, + random_state=self.random_state, ) estimator.fit(X_train, y_train) warnings.warn( @@ -417,9 +401,7 @@ def _check_bound(self, bound: Optional[str]): If bound is not ``None``and method is CRC """ if bound not in self.valid_bounds_: - raise ValueError( - "bound must be in ['hoeffding', 'bernstein', 'wsr', ``None``]" - ) + raise ValueError("bound must be in ['hoeffding', 'bernstein', 'wsr', ``None``]") elif (bound is not None) and (self.method == "crc"): warnings.warn( "WARNING: you are using crc method, hence " @@ -441,8 +423,7 @@ def _check_metric_control(self): if self.metric_control not in self.valid_metric_: raise ValueError( "Invalid metric. " - "Allowed scores must be in the following list " - + ", ".join(self.valid_metric_) + "Allowed scores must be in the following list " + ", ".join(self.valid_metric_) ) if self.method is None: @@ -451,10 +432,7 @@ def _check_metric_control(self): else: # self.metric_control == "precision" self.method = "ltt" - def _transform_pred_proba( - self, - y_pred_proba: Union[Sequence[NDArray], NDArray] - ) -> NDArray: + def _transform_pred_proba(self, y_pred_proba: Union[Sequence[NDArray], NDArray]) -> NDArray: """If the output of the predict_proba is a list of arrays (output of the ``predict_proba`` of ``MultiOutputClassifier``) we transform it into an array of shape (n_samples, n_classes, 1), otherwise, we add @@ -473,10 +451,7 @@ def _transform_pred_proba( if isinstance(y_pred_proba, np.ndarray): y_pred_proba_array = y_pred_proba else: - y_pred_proba_stacked = np.stack( - y_pred_proba, # type: ignore - axis=0 - )[:, :, 1] + y_pred_proba_stacked = np.stack(y_pred_proba, axis=0)[:, :, 1] # type: ignore y_pred_proba_array = np.moveaxis(y_pred_proba_stacked, 0, -1) return np.expand_dims(y_pred_proba_array, axis=2) @@ -518,10 +493,7 @@ def partial_fit( X, y = indexable(X, y) _check_y(y, multi_output=True) - estimator, X, y = self._check_estimator( - X, y, self.estimator, - _refit - ) + estimator, X, y = self._check_estimator(X, y, self.estimator, _refit) y = cast(NDArray, y) X = cast(NDArray, X) @@ -537,13 +509,9 @@ def partial_fit( self.theta_ = X.shape[1] if self.metric_control == "recall": - self.risks = compute_risk_recall( - self.lambdas, y_pred_proba_array, y - ) + self.risks = compute_risk_recall(self.lambdas, y_pred_proba_array, y) else: # self.metric_control == "precision" - self.risks = compute_risk_precision( - self.lambdas, y_pred_proba_array, y - ) + self.risks = compute_risk_precision(self.lambdas, y_pred_proba_array, y) else: if X.shape[1] != self.theta_: msg = "Number of features %d does not match previous data %d." @@ -552,26 +520,15 @@ def partial_fit( y_pred_proba = self.single_estimator_.predict_proba(X) y_pred_proba_array = self._transform_pred_proba(y_pred_proba) if self.metric_control == "recall": - partial_risk = compute_risk_recall( - self.lambdas, - y_pred_proba_array, - y - ) + partial_risk = compute_risk_recall(self.lambdas, y_pred_proba_array, y) else: # self.metric_control == "precision" - partial_risk = compute_risk_precision( - self.lambdas, - y_pred_proba_array, - y - ) + partial_risk = compute_risk_precision(self.lambdas, y_pred_proba_array, y) self.risks = np.concatenate([self.risks, partial_risk], axis=0) return self def fit( - self, - X: ArrayLike, - y: ArrayLike, - calib_size: Optional[float] = .3 + self, X: ArrayLike, y: ArrayLike, calib_size: Optional[float] = 0.3 ) -> MapieMultiLabelClassifier: """ Fit the base estimator or use the fitted base estimator. @@ -603,7 +560,7 @@ def predict( X: ArrayLike, alpha: Optional[Union[float, Iterable[float]]] = None, delta: Optional[float] = None, - bound: Optional[Union[str, None]] = None + bound: Optional[Union[str, None]] = None, ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Prediction sets on new samples based on target confidence @@ -665,12 +622,8 @@ def predict( y_pred_proba = self.single_estimator_.predict_proba(X) y_pred_proba_array = self._transform_pred_proba(y_pred_proba) - y_pred_proba_array = np.repeat( - y_pred_proba_array, - len(alpha_np), - axis=2 - ) - if self.metric_control == 'precision': + y_pred_proba_array = np.repeat(y_pred_proba_array, len(alpha_np), axis=2) + if self.metric_control == "precision": self.n_obs = len(self.risks) self.r_hat = self.risks.mean(axis=0) self.valid_index, self.p_values = ltt_procedure( @@ -678,23 +631,16 @@ def predict( ) self._check_valid_index(alpha_np) self.lambdas_star, self.r_star = find_lambda_control_star( - self.r_hat, self.valid_index, self.lambdas + self.r_hat, self.valid_index, self.lambdas ) y_pred_proba_array = ( - y_pred_proba_array > - np.array(self.lambdas_star)[np.newaxis, np.newaxis, :] + y_pred_proba_array > np.array(self.lambdas_star)[np.newaxis, np.newaxis, :] ) else: self.r_hat, self.r_hat_plus = get_r_hat_plus( - self.risks, self.lambdas, self.method, - bound, delta, self.sigma_init - ) - self.lambdas_star = find_lambda_star( - self.lambdas, self.r_hat_plus, alpha_np - ) - y_pred_proba_array = ( - y_pred_proba_array > - self.lambdas_star[np.newaxis, np.newaxis, :] + self.risks, self.lambdas, self.method, bound, delta, self.sigma_init ) + self.lambdas_star = find_lambda_star(self.lambdas, self.r_hat_plus, alpha_np) + y_pred_proba_array = y_pred_proba_array > self.lambdas_star[np.newaxis, np.newaxis, :] return y_pred, y_pred_proba_array diff --git a/mapie/regression/__init__.py b/mapie/regression/__init__.py index 16243ace..a6e1a6f6 100644 --- a/mapie/regression/__init__.py +++ b/mapie/regression/__init__.py @@ -2,8 +2,4 @@ from .quantile_regression import MapieQuantileRegressor from .time_series_regression import MapieTimeSeriesRegressor -__all__ = [ - "MapieRegressor", - "MapieQuantileRegressor", - "MapieTimeSeriesRegressor" -] +__all__ = ["MapieRegressor", "MapieQuantileRegressor", "MapieTimeSeriesRegressor"] diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index 6fe95c9f..2b69b026 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -9,15 +9,18 @@ from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state -from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, - indexable) +from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable from mapie._compatibility import np_quantile from mapie._typing import ArrayLike, NDArray -from mapie.utils import (check_alpha_and_n_samples, - check_defined_variables_predict_cqr, - check_estimator_fit_predict, check_lower_upper_bounds, - check_null_weight, fit_estimator) +from mapie.utils import ( + check_alpha_and_n_samples, + check_defined_variables_predict_cqr, + check_estimator_fit_predict, + check_lower_upper_bounds, + check_null_weight, + fit_estimator, +) from .regression import MapieRegressor @@ -116,6 +119,7 @@ class MapieQuantileRegressor(MapieRegressor): >>> print(y_pred) [ 5. 7. 9. 11. 13. 15.] """ + valid_methods_ = ["quantile"] fit_attributes = [ "estimators_", @@ -124,32 +128,16 @@ class MapieQuantileRegressor(MapieRegressor): ] quantile_estimator_params = { - "GradientBoostingRegressor": { - "loss_name": "loss", - "alpha_name": "alpha" - }, - "QuantileRegressor": { - "loss_name": "quantile", - "alpha_name": "quantile" - }, - "HistGradientBoostingRegressor": { - "loss_name": "loss", - "alpha_name": "quantile" - }, - "LGBMRegressor": { - "loss_name": "objective", - "alpha_name": "alpha" - }, + "GradientBoostingRegressor": {"loss_name": "loss", "alpha_name": "alpha"}, + "QuantileRegressor": {"loss_name": "quantile", "alpha_name": "quantile"}, + "HistGradientBoostingRegressor": {"loss_name": "loss", "alpha_name": "quantile"}, + "LGBMRegressor": {"loss_name": "objective", "alpha_name": "alpha"}, } def __init__( self, estimator: Optional[ - Union[ - RegressorMixin, - Pipeline, - List[Union[RegressorMixin, Pipeline]] - ] + Union[RegressorMixin, Pipeline, List[Union[RegressorMixin, Pipeline]]] ] = None, method: str = "quantile", cv: Optional[str] = None, @@ -205,15 +193,11 @@ def _check_alpha( ) if isinstance(alpha, float): if np.any(np.logical_or(alpha <= 0, alpha >= 1.0)): - raise ValueError( - "Invalid alpha. Allowed values are between 0.0 and 1.0." - ) + raise ValueError("Invalid alpha. Allowed values are between 0.0 and 1.0.") else: alpha_np = np.array([alpha / 2, 1 - alpha / 2, 0.5]) else: - raise ValueError( - "Invalid alpha. Allowed values are float." - ) + raise ValueError("Invalid alpha. Allowed values are float.") return alpha_np def _check_estimator( @@ -279,9 +263,7 @@ def _check_estimator( else: if name_estimator in self.quantile_estimator_params: param_estimator = estimator.get_params() - loss_name, alpha_name = self.quantile_estimator_params[ - name_estimator - ].values() + loss_name, alpha_name = self.quantile_estimator_params[name_estimator].values() if loss_name in param_estimator: if param_estimator[loss_name] != "quantile": raise ValueError( @@ -300,8 +282,7 @@ def _check_estimator( ) else: raise ValueError( - "The matching parameter `loss_name` for" - + " estimator does not exist." + "The matching parameter `loss_name` for" + " estimator does not exist." ) else: raise ValueError( @@ -309,14 +290,10 @@ def _check_estimator( + " by MapieQuantileRegressor. \n" "Give a base model among: \n" f"{self.quantile_estimator_params.keys()} " - "Or, add your base model to" - + " ``quantile_estimator_params``." + "Or, add your base model to" + " ``quantile_estimator_params``." ) - def _check_cv( - self, - cv: Optional[str] = None - ) -> str: + def _check_cv(self, cv: Optional[str] = None) -> str: """ Check if cv argument is ``None``, ``"split"`` or ``"prefit"``. @@ -342,9 +319,7 @@ def _check_cv( if cv in ("split", "prefit"): return cv else: - raise ValueError( - "Invalid cv method, only valid method is ``split``." - ) + raise ValueError("Invalid cv method, only valid method is ``split``.") def _check_calib_set( self, @@ -357,9 +332,7 @@ def _check_calib_set( random_state: Optional[Union[int, np.random.RandomState, None]] = None, shuffle: Optional[bool] = True, stratify: Optional[ArrayLike] = None, - ) -> Tuple[ - ArrayLike, ArrayLike, ArrayLike, ArrayLike, Optional[ArrayLike] - ]: + ) -> Tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike, Optional[ArrayLike]]: """ Check if a calibration set has already been defined, if not, then we define one using the ``train_test_split`` method. @@ -385,30 +358,23 @@ def _check_calib_set( if X_calib is None or y_calib is None: if sample_weight is None: X_train, X_calib, y_train, y_calib = train_test_split( - X, - y, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify + X, + y, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, ) sample_weight_train = sample_weight else: - ( - X_train, - X_calib, - y_train, - y_calib, - sample_weight_train, - _, - ) = train_test_split( - X, - y, - sample_weight, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify + (X_train, X_calib, y_train, y_calib, sample_weight_train, _,) = train_test_split( + X, + y, + sample_weight, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, ) else: X_train, y_train, sample_weight_train = X, y, sample_weight @@ -444,9 +410,7 @@ def _check_prefit_params( accordingly with the prefit estimators. """ if isinstance(estimator, Iterable) is False: - raise ValueError( - "Estimator for prefit must be an iterable object." - ) + raise ValueError("Estimator for prefit must be an iterable object.") if len(estimator) == 3: for est in estimator: check_estimator_fit_predict(est) @@ -549,10 +513,7 @@ def fit( X_calib, y_calib = indexable(X, y) self.n_calib_samples = _num_samples(y_calib) - y_calib_preds = np.full( - shape=(3, self.n_calib_samples), - fill_value=np.nan - ) + y_calib_preds = np.full(shape=(3, self.n_calib_samples), fill_value=np.nan) for i, est in enumerate(estimator): self.estimators_.append(est) y_calib_preds[i] = est.predict(X_calib).ravel() @@ -582,25 +543,18 @@ def fit( self.n_calib_samples = _num_samples(y_calib) check_alpha_and_n_samples(self.alpha, self.n_calib_samples) sample_weight_train, X_train, y_train = check_null_weight( - sample_weight_train, - X_train, - y_train + sample_weight_train, X_train, y_train ) y_train = cast(NDArray, y_train) - y_calib_preds = np.full( - shape=(3, self.n_calib_samples), - fill_value=np.nan - ) + y_calib_preds = np.full(shape=(3, self.n_calib_samples), fill_value=np.nan) if isinstance(checked_estimator, Pipeline): estimator = checked_estimator[-1] else: estimator = checked_estimator name_estimator = estimator.__class__.__name__ - alpha_name = self.quantile_estimator_params[ - name_estimator - ]["alpha_name"] + alpha_name = self.quantile_estimator_params[name_estimator]["alpha_name"] for i, alpha_ in enumerate(alpha): cloned_estimator_ = clone(checked_estimator) params = {alpha_name: alpha_} @@ -608,23 +562,17 @@ def fit( cloned_estimator_[-1].set_params(**params) else: cloned_estimator_.set_params(**params) - self.estimators_.append(fit_estimator( - cloned_estimator_, X_train, y_train, sample_weight_train - )) + self.estimators_.append( + fit_estimator(cloned_estimator_, X_train, y_train, sample_weight_train) + ) y_calib_preds[i] = self.estimators_[-1].predict(X_calib) self.single_estimator_ = self.estimators_[2] - self.conformity_scores_ = np.full( - shape=(3, self.n_calib_samples), - fill_value=np.nan - ) + self.conformity_scores_ = np.full(shape=(3, self.n_calib_samples), fill_value=np.nan) self.conformity_scores_[0] = y_calib_preds[0] - y_calib self.conformity_scores_[1] = y_calib - y_calib_preds[1] self.conformity_scores_[2] = np.max( - [ - self.conformity_scores_[0], - self.conformity_scores_[1] - ], axis=0 + [self.conformity_scores_[0], self.conformity_scores_[1]], axis=0 ) return self @@ -672,7 +620,7 @@ def predict( """ check_is_fitted(self, self.fit_attributes) check_defined_variables_predict_cqr(ensemble, alpha) - alpha = self.alpha if symmetry else self.alpha/2 + alpha = self.alpha if symmetry else self.alpha / 2 check_alpha_and_n_samples(alpha, self.n_calib_samples) n = self.n_calib_samples @@ -686,21 +634,12 @@ def predict( for i, est in enumerate(self.estimators_): y_preds[i] = est.predict(X) if symmetry: - quantile = np.full( - 2, - np_quantile( - self.conformity_scores_[2], q, method="higher" - ) - ) + quantile = np.full(2, np_quantile(self.conformity_scores_[2], q, method="higher")) else: quantile = np.array( [ - np_quantile( - self.conformity_scores_[0], q, method="higher" - ), - np_quantile( - self.conformity_scores_[1], q, method="higher" - ) + np_quantile(self.conformity_scores_[0], q, method="higher"), + np_quantile(self.conformity_scores_[1], q, method="higher"), ] ) y_pred_low = y_preds[0][:, np.newaxis] - quantile[0] diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 35619315..cdfe8312 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -8,17 +8,22 @@ from sklearn.model_selection import BaseCrossValidator from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state -from sklearn.utils.validation import (_check_y, check_is_fitted, - indexable) +from sklearn.utils.validation import _check_y, check_is_fitted, indexable from mapie._typing import ArrayLike, NDArray -from mapie.conformity_scores import (ConformityScore, - ResidualNormalisedScore) +from mapie.conformity_scores import ConformityScore, ResidualNormalisedScore from mapie.estimator.estimator import EnsembleRegressor -from mapie.utils import (check_alpha, check_alpha_and_n_samples, - check_conformity_score, check_cv, - check_estimator_fit_predict, check_n_features_in, - check_n_jobs, check_null_weight, check_verbose) +from mapie.utils import ( + check_alpha, + check_alpha_and_n_samples, + check_conformity_score, + check_cv, + check_estimator_fit_predict, + check_n_features_in, + check_n_jobs, + check_null_weight, + check_verbose, +) class MapieRegressor(BaseEstimator, RegressorMixin): @@ -252,9 +257,7 @@ def _check_parameters(self) -> None: check_verbose(self.verbose) check_random_state(self.random_state) - def _check_method( - self, method: str - ) -> str: + def _check_method(self, method: str) -> str: """ Check if ``method`` is correct. @@ -274,15 +277,11 @@ def _check_method( If ``method`` is not in ``self.valid_methods_``. """ if method not in self.valid_methods_: - raise ValueError( - f"Invalid method. Allowed values are {self.valid_methods_}." - ) + raise ValueError(f"Invalid method. Allowed values are {self.valid_methods_}.") else: return method - def _check_agg_function( - self, agg_function: Optional[str] = None - ) -> Optional[str]: + def _check_agg_function(self, agg_function: Optional[str] = None) -> Optional[str]: """ Check if ``agg_function`` is correct, and consistent with other arguments. @@ -308,9 +307,7 @@ def _check_agg_function( "Invalid aggregation function " f"Allowed values are '{self.valid_agg_functions_}'." ) - elif (agg_function is None) and ( - type(self.cv).__name__ in self.cv_need_agg_function_ - ): + elif (agg_function is None) and (type(self.cv).__name__ in self.cv_need_agg_function_): raise ValueError( "You need to specify an aggregation function when " f"cv's type is in {self.cv_need_agg_function_}." @@ -320,9 +317,7 @@ def _check_agg_function( else: return "mean" - def _check_estimator( - self, estimator: Optional[RegressorMixin] = None - ) -> RegressorMixin: + def _check_estimator(self, estimator: Optional[RegressorMixin] = None) -> RegressorMixin: """ Check if estimator is ``None``, and returns a ``LinearRegression`` instance if necessary. @@ -361,7 +356,8 @@ def _check_estimator( return estimator def _check_ensemble( - self, ensemble: bool, + self, + ensemble: bool, ) -> None: """ Check if ``ensemble`` is ``False`` and if ``self.agg_function`` @@ -416,18 +412,16 @@ def _check_fit_parameters( """ # Checking self._check_parameters() - cv = check_cv( - self.cv, test_size=self.test_size, random_state=self.random_state - ) + cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) if self.cv in ["split", "prefit"] and self.method != "base": self.method = "base" estimator = self._check_estimator(self.estimator) agg_function = self._check_agg_function(self.agg_function) - cs_estimator = check_conformity_score( - self.conformity_score - ) - if isinstance(cs_estimator, ResidualNormalisedScore) and \ - self.cv not in ["split", "prefit"]: + cs_estimator = check_conformity_score(self.conformity_score) + if isinstance(cs_estimator, ResidualNormalisedScore) and self.cv not in [ + "split", + "prefit", + ]: raise ValueError( "The ResidualNormalisedScore can be used only with " "``cv='split'`` and ``cv='prefit'``" @@ -487,13 +481,15 @@ def fit( The model itself. """ # Checks - (estimator, - self.conformity_score_function_, - agg_function, - cv, - X, - y, - sample_weight) = self._check_fit_parameters(X, y, sample_weight) + ( + estimator, + self.conformity_score_function_, + agg_function, + cv, + X, + y, + sample_weight, + ) = self._check_fit_parameters(X, y, sample_weight) self.estimator_ = EnsembleRegressor( estimator, @@ -503,17 +499,16 @@ def fit( self.n_jobs, self.random_state, self.test_size, - self.verbose + self.verbose, ) # Fit the prediction function self.estimator_ = self.estimator_.fit(X, y, sample_weight) y_pred = self.estimator_.predict_calib(X) # Compute the conformity scores (manage jk-ab case) - self.conformity_scores_ = \ - self.conformity_score_function_.get_conformity_scores( - X, y, y_pred - ) + self.conformity_scores_ = self.conformity_score_function_.get_conformity_scores( + X, y, y_pred + ) return self @@ -576,9 +571,7 @@ def predict( alpha = cast(Optional[NDArray], check_alpha(alpha)) if alpha is None: - y_pred = self.estimator_.predict( - X, ensemble, return_multi_pred=False - ) + y_pred = self.estimator_.predict(X, ensemble, return_multi_pred=False) return np.array(y_pred) else: @@ -586,13 +579,7 @@ def predict( alpha_np = cast(NDArray, alpha) check_alpha_and_n_samples(alpha_np, n) - y_pred, y_pred_low, y_pred_up = \ - self.conformity_score_function_.get_bounds( - X, - self.estimator_, - self.conformity_scores_, - alpha_np, - ensemble, - self.method - ) + y_pred, y_pred_low, y_pred_up = self.conformity_score_function_.get_bounds( + X, self.estimator_, self.conformity_scores_, alpha_np, ensemble, self.method + ) return np.array(y_pred), np.stack([y_pred_low, y_pred_up], axis=1) diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py index 9e7354b9..3b20bd90 100644 --- a/mapie/regression/time_series_regression.py +++ b/mapie/regression/time_series_regression.py @@ -32,8 +32,7 @@ class MapieTimeSeriesRegressor(MapieRegressor): https://arxiv.org/abs/2010.09107 """ - cv_need_agg_function_ = MapieRegressor.cv_need_agg_function_ \ - + ["BlockBootstrap"] + cv_need_agg_function_ = MapieRegressor.cv_need_agg_function_ + ["BlockBootstrap"] valid_methods_ = ["enbpi"] def __init__( @@ -53,7 +52,7 @@ def __init__( n_jobs=n_jobs, agg_function=agg_function, verbose=verbose, - random_state=random_state + random_state=random_state, ) def _relative_conformity_scores( @@ -109,9 +108,7 @@ def _beta_optimize( If lower and upper bounds arrays don't have the same shape. """ if lower_bounds.shape != upper_bounds.shape: - raise ValueError( - "Lower and upper bounds arrays should have the same shape." - ) + raise ValueError("Lower and upper bounds arrays should have the same shape.") alpha = cast(NDArray, alpha) betas_0 = np.full( shape=(len(lower_bounds), len(alpha)), @@ -138,9 +135,7 @@ def _beta_optimize( axis=1, method="lower", ) - betas_0[:, ind_alpha] = betas[ - np.argmin(one_alpha_beta - beta, axis=0) - ] + betas_0[:, ind_alpha] = betas[np.argmin(one_alpha_beta - beta, axis=0)] return betas_0 @@ -222,12 +217,8 @@ def partial_fit( "number of training instances." ) new_conformity_scores_ = self._relative_conformity_scores(X, y) - self.conformity_scores_ = np.roll( - self.conformity_scores_, -len(new_conformity_scores_) - ) - self.conformity_scores_[ - -len(new_conformity_scores_): - ] = new_conformity_scores_ + self.conformity_scores_ = np.roll(self.conformity_scores_, -len(new_conformity_scores_)) + self.conformity_scores_[-len(new_conformity_scores_) :] = new_conformity_scores_ return self def predict( @@ -316,8 +307,7 @@ def predict( self.lower_quantiles_ = lower_quantiles self.higher_quantiles_ = higher_quantiles - if self.method in self.no_agg_methods_ \ - or self.cv in self.no_agg_cv_: + if self.method in self.no_agg_methods_ or self.cv in self.no_agg_cv_: y_pred_low = y_pred[:, np.newaxis] + lower_quantiles y_pred_up = y_pred[:, np.newaxis] + higher_quantiles else: @@ -335,9 +325,8 @@ def predict( def _more_tags(self): return { - "_xfail_checks": - { - "check_estimators_partial_fit_n_features": - "partial_fit can only be called on fitted models" + "_xfail_checks": { + "check_estimators_partial_fit_n_features": "partial_fit can " + "only be called on fitted models" } } diff --git a/mapie/subsample.py b/mapie/subsample.py index 804326fd..f8e09ecf 100644 --- a/mapie/subsample.py +++ b/mapie/subsample.py @@ -55,9 +55,7 @@ def __init__( self.replace = replace self.random_state = random_state - def split( - self, X: NDArray - ) -> Generator[Tuple[NDArray, NDArray], None, None]: + def split(self, X: NDArray) -> Generator[Tuple[NDArray, NDArray], None, None]: """ Generate indices to split data into training and test sets. @@ -74,9 +72,7 @@ def split( The testing set indices for that split. """ indices = np.arange(_num_samples(X)) - n_samples = ( - self.n_samples if self.n_samples is not None else len(indices) - ) + n_samples = self.n_samples if self.n_samples is not None else len(indices) random_state = check_random_state(self.random_state) for k in range(self.n_resamplings): train_index = resample( @@ -153,9 +149,7 @@ def __init__( self.overlapping = overlapping self.random_state = random_state - def split( - self, X: NDArray - ) -> Generator[Tuple[NDArray, NDArray], None, None]: + def split(self, X: NDArray) -> Generator[Tuple[NDArray, NDArray], None, None]: """ Generate indices to split data into training and test sets. @@ -177,16 +171,13 @@ def split( """ if (self.n_blocks is not None) + (self.length is not None) != 1: raise ValueError( - "Exactly one argument between ``length`` or " - "``n_blocks`` has to be not None" + "Exactly one argument between ``length`` or " "``n_blocks`` has to be not None" ) n = len(X) if self.n_blocks is not None: - length = ( - self.length if self.length is not None else n // self.n_blocks - ) + length = self.length if self.length is not None else n // self.n_blocks n_blocks = self.n_blocks else: length = cast(int, self.length) @@ -195,18 +186,15 @@ def split( indices = np.arange(n) if (length <= 0) or (length > n): raise ValueError( - "The length of blocks is <= 0 or greater than the length" - "of training set." + "The length of blocks is <= 0 or greater than the length" "of training set." ) if self.overlapping: blocks = sliding_window_view(indices, window_shape=length) else: - indices = indices[(n % length):] + indices = indices[(n % length) :] blocks_number = n // length - blocks = np.asarray( - np.array_split(indices, indices_or_sections=blocks_number) - ) + blocks = np.asarray(np.array_split(indices, indices_or_sections=blocks_number)) random_state = check_random_state(self.random_state) @@ -218,12 +206,8 @@ def split( random_state=random_state, stratify=None, ) - train_index = np.concatenate( - [blocks[k] for k in block_indices], axis=0 - ) - test_index = np.array( - list(set(indices) - set(train_index)), dtype=np.int64 - ) + train_index = np.concatenate([blocks[k] for k in block_indices], axis=0) + test_index = np.array(list(set(indices) - set(train_index)), dtype=np.int64) yield train_index, test_index def get_n_splits(self, *args: Any, **kargs: Any) -> int: diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index 288f7616..bbd7e4e2 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -20,9 +20,7 @@ random_state = 20 -CALIBRATORS = [ - "sigmoid", "isotonic", _SigmoidCalibration(), LinearRegression() -] +CALIBRATORS = ["sigmoid", "isotonic", _SigmoidCalibration(), LinearRegression()] ESTIMATORS = [ LogisticRegression(), @@ -50,10 +48,10 @@ [np.nan, np.nan, 0.85714286], [np.nan, np.nan, 0.85714286], [np.nan, np.nan, 0.85714286], - [0.83333333, np.nan, np.nan] + [0.83333333, np.nan, np.nan], ], - "top_label_ece": 0.31349206349206343 - } + "top_label_ece": 0.31349206349206343, + }, } results_binary = { @@ -84,24 +82,17 @@ [0.85714286, np.nan], [0.85714286, np.nan], [np.nan, 0.85714286], - [np.nan, 0.85714286] + [np.nan, 0.85714286], ], "top_label_ece": 0.1428571428571429, - "ece": 0.3571428571428571 + "ece": 0.3571428571428571, }, } -X, y = make_classification( - n_samples=20, - n_classes=3, - n_informative=4, - random_state=random_state -) +X, y = make_classification(n_samples=20, n_classes=3, n_informative=4, random_state=random_state) -X_, X_test, y_, y_test = train_test_split( - X, y, test_size=0.33, random_state=random_state -) +X_, X_test, y_, y_test = train_test_split(X, y, test_size=0.33, random_state=random_state) X_train, X_calib, y_train, y_calib = train_test_split( X_, y_, test_size=0.33, random_state=random_state ) @@ -123,26 +114,11 @@ def test_default_parameters() -> None: def test_default_fit_params() -> None: """Test default sample weights and other parameters.""" mapie_cal = MapieCalibrator() - assert ( - signature(mapie_cal.fit).parameters["sample_weight"].default - is None - ) - assert ( - signature(mapie_cal.fit).parameters["calib_size"].default - == 0.33 - ) - assert ( - signature(mapie_cal.fit).parameters["random_state"].default - is None - ) - assert ( - signature(mapie_cal.fit).parameters["shuffle"].default - is True - ) - assert ( - signature(mapie_cal.fit).parameters["stratify"].default - is None - ) + assert signature(mapie_cal.fit).parameters["sample_weight"].default is None + assert signature(mapie_cal.fit).parameters["calib_size"].default == 0.33 + assert signature(mapie_cal.fit).parameters["random_state"].default is None + assert signature(mapie_cal.fit).parameters["shuffle"].default is True + assert signature(mapie_cal.fit).parameters["stratify"].default is None def test_false_str_estimator() -> None: @@ -151,9 +127,7 @@ def test_false_str_estimator() -> None: ValueError, match=r".*Please provide a string in*", ): - mapie_cal = MapieCalibrator( - calibrator="not_estimator" - ) + mapie_cal = MapieCalibrator(calibrator="not_estimator") mapie_cal.fit(X, y) @@ -162,8 +136,7 @@ def test_estimator_none() -> None: mapie_cal = MapieCalibrator() mapie_cal.fit(X, y) assert isinstance( - mapie_cal.calibrators[list(mapie_cal.calibrators.keys())[0]], - _SigmoidCalibration + mapie_cal.calibrators[list(mapie_cal.calibrators.keys())[0]], _SigmoidCalibration ) @@ -172,10 +145,7 @@ def test_check_type_of_target() -> None: X = [0.5, 0.2, 0.4, 0.8, 3.8] y = [0.4, 0.2, 3.6, 3, 0.2] mapie_cal = MapieCalibrator() - with pytest.raises( - ValueError, - match=r".*Make sure to have one of the allowed targets:*" - ): + with pytest.raises(ValueError, match=r".*Make sure to have one of the allowed targets:*"): mapie_cal.fit(X, y) @@ -215,18 +185,12 @@ def test_invalid_cv_argument(cv: str) -> None: def test_prefit_split_same_results() -> None: """Test that prefit and split method return the same result""" - est = RandomForestClassifier( - random_state=random_state - ).fit(X_train, y_train) + est = RandomForestClassifier(random_state=random_state).fit(X_train, y_train) mapie_cal_prefit = MapieCalibrator(estimator=est, cv="prefit") mapie_cal_prefit.fit(X_calib, y_calib) - mapie_cal_split = MapieCalibrator( - estimator=RandomForestClassifier(random_state=random_state) - ) - mapie_cal_split.fit( - X_, y_, random_state=random_state - ) + mapie_cal_split = MapieCalibrator(estimator=RandomForestClassifier(random_state=random_state)) + mapie_cal_split.fit(X_, y_, random_state=random_state) y_prefit = mapie_cal_prefit.predict_proba(X_test) y_split = mapie_cal_split.predict_proba(X_test) np.testing.assert_allclose(y_split, y_prefit) @@ -237,10 +201,7 @@ def test_not_seen_calibrator() -> None: Test that there is a warning if no calibration occurs due to no calibrator for this class. """ - with pytest.warns( - UserWarning, - match=r".*WARNING: This predicted label*" - ): + with pytest.warns(UserWarning, match=r".*WARNING: This predicted label*"): mapie_cal = MapieCalibrator() mapie_cal.fit(X, y) mapie_cal.calibrators.clear() @@ -250,8 +211,7 @@ def test_not_seen_calibrator() -> None: @pytest.mark.parametrize("calibrator", CALIBRATORS) @pytest.mark.parametrize("estimator", ESTIMATORS) def test_shape_of_output( - calibrator: Union[str, RegressorMixin], - estimator: ClassifierMixin + calibrator: Union[str, RegressorMixin], estimator: ClassifierMixin ) -> None: """Test that the size of the outputs are coherent.""" mapie_cal = MapieCalibrator( @@ -269,11 +229,7 @@ def test_number_of_classes_equal_calibrators() -> None: of classes in the calibration step. """ mapie_cal = MapieCalibrator() - mapie_cal.fit( - X=X_, - y=y_, - random_state=random_state - ) + mapie_cal.fit(X=X_, y=y_, random_state=random_state) y_pred_calib_set = mapie_cal.single_estimator_.predict(X=X_calib) assert len(mapie_cal.calibrators) == len(np.unique(y_pred_calib_set)) @@ -281,22 +237,12 @@ def test_number_of_classes_equal_calibrators() -> None: def test_same_predict() -> None: """Test that the same prediction is made regardless of the calibration.""" mapie_cal = MapieCalibrator(method="top_label") - mapie_cal.fit( - X=X_, - y=y_, - random_state=random_state - ) + mapie_cal.fit(X=X_, y=y_, random_state=random_state) y_pred_calib_set = mapie_cal.single_estimator_.predict(X=X_test) y_pred_calib_set_through_predict = mapie_cal.predict(X=X_test) - y_pred_calibrated_test_set = np.nanargmax( - mapie_cal.predict_proba(X=X_test), - axis=1 - ) + y_pred_calibrated_test_set = np.nanargmax(mapie_cal.predict_proba(X=X_test), axis=1) np.testing.assert_allclose(y_pred_calib_set, y_pred_calibrated_test_set) - np.testing.assert_allclose( - y_pred_calib_set, - y_pred_calib_set_through_predict - ) + np.testing.assert_allclose(y_pred_calib_set, y_pred_calib_set_through_predict) @pytest.mark.parametrize("cv", MapieCalibrator.valid_cv) @@ -306,20 +252,14 @@ def test_correct_results(cv: str) -> None: in the correct scores (in a multi-class setting). """ mapie_cal = MapieCalibrator(cv=cv) - mapie_cal.fit( - X=X_, - y=y_, - random_state=random_state - ) + mapie_cal.fit(X=X_, y=y_, random_state=random_state) pred_ = mapie_cal.predict_proba(X_test) top_label_ece_ = top_label_ece(y_test, pred_) np.testing.assert_array_almost_equal( results[cv]["y_score"], pred_ # type:ignore ) np.testing.assert_allclose( # type:ignore - results[cv]["top_label_ece"], - top_label_ece_, - rtol=1e-2 + results[cv]["top_label_ece"], top_label_ece_, rtol=1e-2 ) @@ -330,17 +270,10 @@ def test_correct_results_binary(cv: str) -> None: in the correct scores (in a binary setting). """ X_binary, y_binary = make_classification( - n_samples=10, - n_classes=2, - n_informative=4, - random_state=random_state + n_samples=10, n_classes=2, n_informative=4, random_state=random_state ) mapie_cal = MapieCalibrator(cv=cv) - mapie_cal.fit( - X=X_binary, - y=y_binary, - random_state=random_state - ) + mapie_cal.fit(X=X_binary, y=y_binary, random_state=random_state) pred_ = mapie_cal.predict_proba(X_binary) top_label_ece_ = top_label_ece(y_binary, pred_) ece = expected_calibration_error(y_binary, pred_) @@ -348,14 +281,10 @@ def test_correct_results_binary(cv: str) -> None: results_binary[cv]["y_score"], pred_ # type:ignore ) np.testing.assert_allclose( # type:ignore - results_binary[cv]["top_label_ece"], - top_label_ece_, - rtol=1e-2 + results_binary[cv]["top_label_ece"], top_label_ece_, rtol=1e-2 ) np.testing.assert_allclose( # type:ignore - results_binary[cv]["ece"], - ece, - rtol=1e-2 + results_binary[cv]["ece"], ece, rtol=1e-2 ) @@ -365,10 +294,7 @@ def test_different_binary_y_combinations() -> None: scores are always the same. """ X_comb, y_comb = make_classification( - n_samples=20, - n_classes=3, - n_informative=4, - random_state=random_state + n_samples=20, n_classes=3, n_informative=4, random_state=random_state ) mapie_cal = MapieCalibrator() mapie_cal.fit(X_comb, y_comb, random_state=random_state) @@ -385,24 +311,16 @@ def test_different_binary_y_combinations() -> None: y_score2 = mapie_cal2.predict_proba(X_comb) np.testing.assert_array_almost_equal(y_score, y_score1) np.testing.assert_array_almost_equal(y_score, y_score2) - assert top_label_ece( - y_comb, y_score, classes=mapie_cal.classes_ - ) == top_label_ece( + assert top_label_ece(y_comb, y_score, classes=mapie_cal.classes_) == top_label_ece( y_comb1, y_score1, classes=mapie_cal1.classes_ ) - assert top_label_ece( - y_comb, y_score, classes=mapie_cal.classes_ - ) == top_label_ece( + assert top_label_ece(y_comb, y_score, classes=mapie_cal.classes_) == top_label_ece( y_comb2, y_score2, classes=mapie_cal2.classes_ ) -@pytest.mark.parametrize( - "calibrator", [LinearRegression(), "isotonic"] -) -def test_results_with_constant_sample_weights( - calibrator: Union[str, RegressorMixin] -) -> None: +@pytest.mark.parametrize("calibrator", [LinearRegression(), "isotonic"]) +def test_results_with_constant_sample_weights(calibrator: Union[str, RegressorMixin]) -> None: """ Test predictions when sample weights are None or constant with different values. @@ -416,14 +334,8 @@ def test_results_with_constant_sample_weights( mapie_clf1 = MapieCalibrator(estimator=estimator, calibrator=calibrator) mapie_clf2 = MapieCalibrator(estimator=estimator, calibrator=calibrator) mapie_clf0.fit(X, y, sample_weight=None, random_state=random_state) - mapie_clf1.fit( - X, y, sample_weight=np.ones(shape=n_samples), - random_state=random_state - ) - mapie_clf2.fit( - X, y, sample_weight=np.ones(shape=n_samples) * 5, - random_state=random_state - ) + mapie_clf1.fit(X, y, sample_weight=np.ones(shape=n_samples), random_state=random_state) + mapie_clf2.fit(X, y, sample_weight=np.ones(shape=n_samples) * 5, random_state=random_state) y_pred0 = mapie_clf0.predict_proba(X) y_pred1 = mapie_clf1.predict_proba(X) y_pred2 = mapie_clf2.predict_proba(X) @@ -446,15 +358,10 @@ def test_pipeline_compatibility() -> None: ] ) categorical_preprocessor = Pipeline( - steps=[ - ("encoding", OneHotEncoder(handle_unknown="ignore")) - ] + steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))] ) preprocessor = ColumnTransformer( - [ - ("cat", categorical_preprocessor, ["x_cat"]), - ("num", numeric_preprocessor, ["x_num"]) - ] + [("cat", categorical_preprocessor, ["x_cat"]), ("num", numeric_preprocessor, ["x_num"])] ) pipe = make_pipeline(preprocessor, LogisticRegression()) pipe.fit(X, y) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 583770ac..f10985d0 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -30,48 +30,15 @@ WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] WRONG_INCLUDE_LABELS = ["randomised", "True", "False", "other", 1, 2.5, (1, 2)] Y_PRED_PROBA_WRONG = [ - np.array( - [ - [0.8, 0.01, 0.1, 0.05], - [1.0, 0.1, 0.0, 0.0] - ] - ), - np.array( - [ - [1.0, 0.0001, 0.0] - ] - ), - np.array( - [ - [0.8, 0.1, 0.05, 0.05], - [0.9, 0.01, 0.04, 0.06] - ] - ), - np.array( - [ - [0.8, 0.1, 0.02, 0.05], - [0.9, 0.01, 0.03, 0.06] - ] - ) + np.array([[0.8, 0.01, 0.1, 0.05], [1.0, 0.1, 0.0, 0.0]]), + np.array([[1.0, 0.0001, 0.0]]), + np.array([[0.8, 0.1, 0.05, 0.05], [0.9, 0.01, 0.04, 0.06]]), + np.array([[0.8, 0.1, 0.02, 0.05], [0.9, 0.01, 0.03, 0.06]]), ] Y_TRUE_PROBA_PLACE = [ - [ - np.array([2, 0]), - np.array([ - [.1, .3, .6], - [.2, .7, .1] - ]), - np.array([[0], [1]]) - ], - [ - np.array([1, 0]), - np.array([ - [.7, .12, .18], - [.5, .24, .26] - ]), - np.array([[2], [0]]) - ] + [np.array([2, 0]), np.array([[0.1, 0.3, 0.6], [0.2, 0.7, 0.1]]), np.array([[0], [1]])], + [np.array([1, 0]), np.array([[0.7, 0.12, 0.18], [0.5, 0.24, 0.26]]), np.array([[2], [0]])], ] Params = TypedDict( @@ -80,391 +47,163 @@ "method": str, "cv": Optional[Union[int, str]], "test_size": Optional[Union[int, float]], - "random_state": Optional[int] - } + "random_state": Optional[int], + }, ) ParamsPredict = TypedDict( - "ParamsPredict", - { - "include_last_label": Union[bool, str], - "agg_scores": str - } + "ParamsPredict", {"include_last_label": Union[bool, str], "agg_scores": str} ) STRATEGIES = { "lac": ( - Params( - method="lac", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="lac", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "lac_split": ( - Params( - method="lac", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="lac", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "lac_cv_mean": ( - Params( - method="lac", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="lac", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "lac_cv_crossval": ( - Params( - method="lac", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="crossval" - ) + Params(method="lac", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="crossval"), ), "aps_include": ( - Params( - method="aps", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="aps", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "aps_not_include": ( - Params( - method="aps", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="aps", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "aps_randomized": ( - Params( - method="aps", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label="randomized", - agg_scores="mean" - ) + Params(method="aps", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label="randomized", agg_scores="mean"), ), "aps_include_split": ( - Params( - method="aps", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="aps", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "aps_not_include_split": ( - Params( - method="aps", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="aps", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "aps_randomized_split": ( - Params( - method="aps", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label="randomized", - agg_scores="mean" - ) + Params(method="aps", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label="randomized", agg_scores="mean"), ), "aps_include_cv_mean": ( - Params( - method="aps", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="aps", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "aps_not_include_cv_mean": ( - Params( - method="aps", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="aps", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "aps_randomized_cv_mean": ( - Params( - method="aps", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label="randomized", - agg_scores="mean" - ) + Params(method="aps", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label="randomized", agg_scores="mean"), ), "aps_include_cv_crossval": ( - Params( - method="aps", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="crossval" - ) + Params(method="aps", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="crossval"), ), "aps_not_include_cv_crossval": ( - Params( - method="aps", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=False, - agg_scores="crossval" - ) + Params(method="aps", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label=False, agg_scores="crossval"), ), "aps_randomized_cv_crossval": ( - Params( - method="aps", - cv=3, - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label="randomized", - agg_scores="crossval" - ) + Params(method="aps", cv=3, test_size=None, random_state=random_state), + ParamsPredict(include_last_label="randomized", agg_scores="crossval"), ), "naive": ( - Params( - method="naive", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="naive", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "naive_split": ( - Params( - method="naive", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="naive", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "top_k": ( - Params( - method="top_k", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="top_k", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "top_k_split": ( - Params( - method="top_k", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="top_k", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "raps": ( - Params( - method="raps", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="raps", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "raps_split": ( - Params( - method="raps", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label=True, - agg_scores="mean" - ) + Params(method="raps", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label=True, agg_scores="mean"), ), "raps_randomized": ( - Params( - method="raps", - cv="prefit", - test_size=None, - random_state=random_state - ), - ParamsPredict( - include_last_label="randomized", - agg_scores="mean" - ) + Params(method="raps", cv="prefit", test_size=None, random_state=random_state), + ParamsPredict(include_last_label="randomized", agg_scores="mean"), ), "raps_randomized_split": ( - Params( - method="raps", - cv="split", - test_size=0.5, - random_state=random_state - ), - ParamsPredict( - include_last_label="randomized", - agg_scores="mean" - ) + Params(method="raps", cv="split", test_size=0.5, random_state=random_state), + ParamsPredict(include_last_label="randomized", agg_scores="mean"), ), } STRATEGIES_BINARY = { "lac": ( - Params( - method="lac", - cv="prefit", - test_size=None, - random_state=42 - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="lac", cv="prefit", test_size=None, random_state=42), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "lac_split": ( - Params( - method="lac", - cv="split", - test_size=0.5, - random_state=42 - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="lac", cv="split", test_size=0.5, random_state=42), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "lac_cv_mean": ( - Params( - method="lac", - cv=3, - test_size=None, - random_state=42 - ), - ParamsPredict( - include_last_label=False, - agg_scores="mean" - ) + Params(method="lac", cv=3, test_size=None, random_state=42), + ParamsPredict(include_last_label=False, agg_scores="mean"), ), "lac_cv_crossval": ( - Params( - method="lac", - cv=3, - test_size=None, - random_state=42 - ), - ParamsPredict( - include_last_label=False, - agg_scores="crossval" - ) - ) + Params(method="lac", cv=3, test_size=None, random_state=42), + ParamsPredict(include_last_label=False, agg_scores="crossval"), + ), } COVERAGES = { - "lac": 6/9, - "lac_split": 8/9, + "lac": 6 / 9, + "lac_split": 8 / 9, "lac_cv_mean": 1.0, "lac_cv_crossval": 1.0, "aps_include": 1.0, - "aps_not_include": 5/9, - "aps_randomized": 6/9, - "aps_include_split": 8/9, - "aps_not_include_split": 5/9, - "aps_randomized_split": 7/9, + "aps_not_include": 5 / 9, + "aps_randomized": 6 / 9, + "aps_include_split": 8 / 9, + "aps_not_include_split": 5 / 9, + "aps_randomized_split": 7 / 9, "aps_include_cv_mean": 1.0, - "aps_not_include_cv_mean": 5/9, - "aps_randomized_cv_mean": 8/9, - "aps_include_cv_crossval": 4/9, - "aps_not_include_cv_crossval": 1/9, - "aps_randomized_cv_crossval": 7/9, - "naive": 5/9, - "naive_split": 5/9, + "aps_not_include_cv_mean": 5 / 9, + "aps_randomized_cv_mean": 8 / 9, + "aps_include_cv_crossval": 4 / 9, + "aps_not_include_cv_crossval": 1 / 9, + "aps_randomized_cv_crossval": 7 / 9, + "naive": 5 / 9, + "naive_split": 5 / 9, "top_k": 1.0, "top_k_split": 1.0, "raps": 1.0, - "raps_split": 7/9, - "raps_randomized": 8/9, - "raps_randomized_split": 1.0 + "raps_split": 7 / 9, + "raps_randomized": 8 / 9, + "raps_randomized_split": 1.0, } COVERAGES_BINARY = { - "lac": 6/9, - "lac_split": 8/9, - "lac_cv_mean": 6/9, - "lac_cv_crossval": 6/9 + "lac": 6 / 9, + "lac_split": 8 / 9, + "lac_cv_mean": 6 / 9, + "lac_cv_crossval": 6 / 9, } X_toy = np.arange(9).reshape(-1, 1) @@ -481,7 +220,7 @@ [False, True, False], [False, True, False], [False, True, True], - [False, False, True] + [False, False, True], ], "lac_split": [ [True, True, False], @@ -503,7 +242,7 @@ [False, True, True], [False, True, True], [False, True, True], - [False, True, True] + [False, True, True], ], "lac_cv_crossval": [ [True, False, False], @@ -514,7 +253,7 @@ [False, True, False], [False, True, True], [False, True, True], - [False, True, True] + [False, True, True], ], "aps_include": [ [True, False, False], @@ -525,7 +264,7 @@ [False, True, False], [False, True, True], [False, True, True], - [False, False, True] + [False, False, True], ], "aps_not_include": [ [True, False, False], @@ -536,7 +275,7 @@ [False, True, False], [False, True, False], [False, False, True], - [False, False, True] + [False, False, True], ], "aps_randomized": [ [True, False, False], @@ -547,7 +286,7 @@ [False, True, False], [False, True, False], [False, True, True], - [False, False, True] + [False, False, True], ], "aps_include_split": [ [True, True, False], @@ -558,7 +297,7 @@ [True, True, True], [False, True, True], [False, False, True], - [False, False, True] + [False, False, True], ], "aps_not_include_split": [ [False, True, False], @@ -569,7 +308,7 @@ [False, True, True], [False, False, True], [False, False, True], - [False, False, True] + [False, False, True], ], "aps_randomized_split": [ [False, True, False], @@ -580,7 +319,7 @@ [False, True, True], [False, False, True], [False, False, True], - [False, False, True] + [False, False, True], ], "aps_include_cv_mean": [ [True, False, False], @@ -591,7 +330,7 @@ [False, True, True], [False, True, True], [False, True, True], - [False, True, True] + [False, True, True], ], "aps_not_include_cv_mean": [ [True, False, False], @@ -602,7 +341,7 @@ [False, True, False], [False, True, False], [False, False, True], - [False, False, True] + [False, False, True], ], "aps_randomized_cv_mean": [ [True, False, False], @@ -613,7 +352,7 @@ [False, True, False], [False, True, False], [False, True, True], - [False, True, True] + [False, True, True], ], "aps_include_cv_crossval": [ [False, False, False], @@ -624,7 +363,7 @@ [False, True, False], [False, True, False], [False, True, False], - [False, False, False] + [False, False, False], ], "aps_not_include_cv_crossval": [ [False, False, False], @@ -635,7 +374,7 @@ [False, False, False], [False, False, False], [False, False, False], - [False, False, False] + [False, False, False], ], "aps_randomized_cv_crossval": [ [True, False, False], @@ -646,7 +385,7 @@ [False, True, True], [False, True, True], [False, True, False], - [False, False, True] + [False, False, True], ], "naive": [ [True, False, False], @@ -657,7 +396,7 @@ [False, True, False], [False, True, False], [False, False, True], - [False, False, True] + [False, False, True], ], "naive_split": [ [False, True, False], @@ -668,7 +407,7 @@ [False, True, True], [False, False, True], [False, False, True], - [False, False, True] + [False, False, True], ], "top_k": [ [True, True, False], @@ -679,7 +418,7 @@ [False, True, True], [False, True, True], [False, True, True], - [False, True, True] + [False, True, True], ], "top_k_split": [ [True, True, False], @@ -690,7 +429,7 @@ [False, True, True], [False, True, True], [False, True, True], - [False, True, True] + [False, True, True], ], "raps": [ [True, False, False], @@ -701,7 +440,7 @@ [False, True, True], [False, True, True], [False, True, True], - [False, True, True] + [False, True, True], ], "raps_split": [ [True, True, False], @@ -712,7 +451,7 @@ [True, True, False], [True, True, False], [True, True, False], - [True, True, False] + [True, True, False], ], "raps_randomized": [ [True, False, False], @@ -723,7 +462,7 @@ [False, True, False], [False, True, False], [False, True, True], - [False, False, True] + [False, False, True], ], "raps_randomized_split": [ [True, True, True], @@ -734,8 +473,8 @@ [True, True, True], [True, True, True], [True, True, True], - [True, True, True] - ] + [True, True, True], + ], } X_toy_binary = np.arange(9).reshape(-1, 1) @@ -751,7 +490,7 @@ [False, True], [False, True], [False, True], - [False, True] + [False, True], ], "lac_split": [ [True, True], @@ -762,7 +501,7 @@ [True, True], [True, True], [True, True], - [True, False] + [True, False], ], "lac_cv_mean": [ [True, False], @@ -773,7 +512,7 @@ [False, True], [False, True], [False, True], - [False, True] + [False, True], ], "lac_cv_crossval": [ [True, False], @@ -784,15 +523,11 @@ [False, True], [False, True], [False, True], - [False, True] - ] + [False, True], + ], } -REGULARIZATION_PARAMETERS = [ - [.001, [1]], - [[.01, .2], [1, 3]], - [.1, [2, 4]] -] +REGULARIZATION_PARAMETERS = [[0.001, [1]], [[0.01, 0.2], [1, 3]], [0.1, [2, 4]]] IMAGE_INPUT = [ { @@ -806,7 +541,7 @@ { "X_calib": np.zeros((3, 256, 512)), "X_test": np.ones((3, 256, 512)), - } + }, ] X_good_image = np.zeros((3, 1024, 1024, 3)) @@ -823,7 +558,6 @@ class CumulatedScoreClassifier: - def __init__(self) -> None: self.X_calib = np.array([0, 1, 2]).reshape(-1, 1) self.y_calib = np.array([0, 1, 2]) @@ -832,12 +566,7 @@ def __init__(self) -> None: ) self.X_test = np.array([3, 4, 5]).reshape(-1, 1) self.y_pred_sets = np.array( - [ - [True, True, False], - [False, True, False], - [False, True, True], - [True, True, False] - ] + [[True, True, False], [False, True, False], [False, True, True], [True, True, False]] ) self.classes_ = self.y_calib @@ -850,17 +579,12 @@ def predict(self, X: ArrayLike) -> NDArray: def predict_proba(self, X: ArrayLike) -> NDArray: if np.max(X) <= 2: - return np.array( - [[0.4, 0.5, 0.1], [0.2, 0.6, 0.2], [0.6, 0.3, 0.1]] - ) + return np.array([[0.4, 0.5, 0.1], [0.2, 0.6, 0.2], [0.6, 0.3, 0.1]]) else: - return np.array( - [[0.2, 0.7, 0.1], [0., 1., 0.], [0., .7, 0.3], [0.3, .7, 0.]] - ) + return np.array([[0.2, 0.7, 0.1], [0.0, 1.0, 0.0], [0.0, 0.7, 0.3], [0.3, 0.7, 0.0]]) class ImageClassifier: - def __init__(self, X_calib: ArrayLike, X_test: ArrayLike) -> None: self.X_calib = X_calib self.y_calib = np.array([0, 1, 2]) @@ -882,17 +606,12 @@ def predict(self, *args: Any) -> NDArray: def predict_proba(self, X: ArrayLike) -> NDArray: if np.max(X) == 0: - return np.array( - [[0.4, 0.5, 0.1], [0.2, 0.6, 0.2], [0.6, 0.3, 0.1]] - ) + return np.array([[0.4, 0.5, 0.1], [0.2, 0.6, 0.2], [0.6, 0.3, 0.1]]) else: - return np.array( - [[0.2, 0.7, 0.1], [0.1, 0.2, 0.7], [0.3, 0.5, 0.2]] - ) + return np.array([[0.2, 0.7, 0.1], [0.1, 0.2, 0.7], [0.3, 0.5, 0.2]]) class WrongOutputModel: - def __init__(self, proba_out: NDArray): self.trained_ = True self.proba_out = proba_out @@ -905,14 +624,11 @@ def predict_proba(self, *args: Any) -> NDArray: return self.proba_out def predict(self, *args: Any) -> NDArray: - pred = ( - self.proba_out == self.proba_out.max(axis=1)[:, None] - ).astype(int) + pred = (self.proba_out == self.proba_out.max(axis=1)[:, None]).astype(int) return pred class Float32OuputModel: - def __init__(self, prefit: bool = True): self.trained_ = prefit self.classes_ = [0, 1, 2] @@ -922,7 +638,7 @@ def fit(self, *args: Any) -> None: self.trained_ = True def predict_proba(self, X: NDArray, *args: Any) -> NDArray: - probas = np.array([[.9, .05, .05]]) + probas = np.array([[0.9, 0.05, 0.05]]) proba_out = np.repeat(probas, len(X), axis=0).astype(np.float32) return proba_out @@ -958,9 +674,7 @@ def test_default_parameters() -> None: @pytest.mark.parametrize("method", ["aps", "raps"]) def test_warning_binary_classif(cv: str, method: str) -> None: """Test that a warning is raised y is binary.""" - mapie_clf = MapieClassifier( - cv=cv, method=method, random_state=random_state - ) + mapie_clf = MapieClassifier(cv=cv, method=method, random_state=random_state) X, y = make_classification( n_samples=500, n_features=10, @@ -968,9 +682,7 @@ def test_warning_binary_classif(cv: str, method: str) -> None: n_classes=2, random_state=random_state, ) - with pytest.raises( - ValueError, match=r".*Invalid method for binary target.*" - ): + with pytest.raises(ValueError, match=r".*Invalid method for binary target.*"): mapie_clf.fit(X, y) @@ -1002,24 +714,28 @@ def test_valid_estimator(strategy: str) -> None: @pytest.mark.parametrize("method", METHODS) def test_valid_method(method: str) -> None: """Test that valid methods raise no errors.""" - mapie_clf = MapieClassifier( - method=method, cv="prefit", random_state=random_state - ) + mapie_clf = MapieClassifier(method=method, cv="prefit", random_state=random_state) mapie_clf.fit(X_toy, y_toy) check_is_fitted(mapie_clf, mapie_clf.fit_attributes) @pytest.mark.parametrize( - "cv", [None, -1, 2, KFold(), LeaveOneOut(), "prefit", - ShuffleSplit(n_splits=1, test_size=0.5, random_state=random_state)] + "cv", + [ + None, + -1, + 2, + KFold(), + LeaveOneOut(), + "prefit", + ShuffleSplit(n_splits=1, test_size=0.5, random_state=random_state), + ], ) def test_valid_cv(cv: Any) -> None: """Test that valid cv raises no errors.""" model = LogisticRegression(multi_class="multinomial") model.fit(X_toy, y_toy) - mapie_clf = MapieClassifier( - estimator=model, cv=cv, random_state=random_state - ) + mapie_clf = MapieClassifier(estimator=model, cv=cv, random_state=random_state) mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, alpha=0.5) @@ -1027,9 +743,7 @@ def test_valid_cv(cv: Any) -> None: @pytest.mark.parametrize("agg_scores", ["mean", "crossval"]) def test_agg_scores_argument(agg_scores: str) -> None: """Test that predict passes with all valid 'agg_scores' arguments.""" - mapie_clf = MapieClassifier( - cv=3, method="lac", random_state=random_state - ) + mapie_clf = MapieClassifier(cv=3, method="lac", random_state=random_state) mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, alpha=0.5, agg_scores=agg_scores) @@ -1037,13 +751,9 @@ def test_agg_scores_argument(agg_scores: str) -> None: @pytest.mark.parametrize("agg_scores", ["median", 1, None]) def test_invalid_agg_scores_argument(agg_scores: str) -> None: """Test that invalid 'agg_scores' raise errors.""" - mapie_clf = MapieClassifier( - cv=3, method="lac", random_state=random_state - ) + mapie_clf = MapieClassifier(cv=3, method="lac", random_state=random_state) mapie_clf.fit(X_toy, y_toy) - with pytest.raises( - ValueError, match=r".*Invalid 'agg_scores' argument.*" - ): + with pytest.raises(ValueError, match=r".*Invalid 'agg_scores' argument.*"): mapie_clf.predict(X_toy, alpha=0.5, agg_scores=agg_scores) @@ -1059,27 +769,21 @@ def test_too_large_cv(cv: Any) -> None: @pytest.mark.parametrize( - "include_last_label", - [-3.14, 1.5, -2, 0, 1, "cv", DummyClassifier(), [1, 2]] + "include_last_label", [-3.14, 1.5, -2, 0, 1, "cv", DummyClassifier(), [1, 2]] ) def test_invalid_include_last_label(include_last_label: Any) -> None: """Test that invalid include_last_label raise errors.""" mapie_clf = MapieClassifier(random_state=random_state) mapie_clf.fit(X_toy, y_toy) - with pytest.raises( - ValueError, match=r".*Invalid include_last_label argument.*" - ): - mapie_clf.predict( - X_toy, - y_toy, - include_last_label=include_last_label - ) + with pytest.raises(ValueError, match=r".*Invalid include_last_label argument.*"): + mapie_clf.predict(X_toy, y_toy, include_last_label=include_last_label) @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_predict_output_shape( - strategy: str, alpha: Any, + strategy: str, + alpha: Any, ) -> None: """Test predict output shape.""" args_init, args_predict = STRATEGIES[strategy] @@ -1089,7 +793,7 @@ def test_predict_output_shape( X, alpha=alpha, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) n_alpha = len(alpha) if hasattr(alpha, "__len__") else 1 assert y_pred.shape == (X.shape[0],) @@ -1099,26 +803,25 @@ def test_predict_output_shape( @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_y_is_list_of_string( - strategy: str, alpha: Any, + strategy: str, + alpha: Any, ) -> None: """Test predict output shape with string y.""" args_init, args_predict = STRATEGIES[strategy] mapie_clf = MapieClassifier(**args_init) - mapie_clf.fit(X, y.astype('str')) + mapie_clf.fit(X, y.astype("str")) y_pred, y_ps = mapie_clf.predict( X, alpha=alpha, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) n_alpha = len(alpha) if hasattr(alpha, "__len__") else 1 assert y_pred.shape == (X.shape[0],) assert y_ps.shape == (X.shape[0], len(np.unique(y)), n_alpha) -@pytest.mark.parametrize( - "strategy", ["naive", "top_k", "lac", "aps_include"] -) +@pytest.mark.parametrize("strategy", ["naive", "top_k", "lac", "aps_include"]) def test_same_results_prefit_split(strategy: str) -> None: """ Test checking that if split and prefit method have exactly @@ -1136,7 +839,7 @@ def test_same_results_prefit_split(strategy: str) -> None: X_train_, X_calib_ = X[train_index], X[val_index] y_train_, y_calib_ = y[train_index], y[val_index] - args_init, args_predict = deepcopy(STRATEGIES[strategy + '_split']) + args_init, args_predict = deepcopy(STRATEGIES[strategy + "_split"]) args_init["cv"] = cv mapie_reg = MapieClassifier(**args_init) mapie_reg.fit(X, y) @@ -1156,13 +859,14 @@ def test_same_results_prefit_split(strategy: str) -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_same_result_y_numeric_and_string( - strategy: str, alpha: Any, + strategy: str, + alpha: Any, ) -> None: """Test that MAPIE outputs the same results if y is numeric or string""" args_init, args_predict = STRATEGIES[strategy] mapie_clf_str = MapieClassifier(**args_init) - mapie_clf_str.fit(X, y.astype('str')) + mapie_clf_str.fit(X, y.astype("str")) mapie_clf_int = MapieClassifier(**args_init) mapie_clf_int.fit(X, y) _, y_ps_str = mapie_clf_str.predict( @@ -1175,7 +879,7 @@ def test_same_result_y_numeric_and_string( X, alpha=alpha, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_ps_int, y_ps_str) @@ -1183,7 +887,8 @@ def test_same_result_y_numeric_and_string( @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_y_1_to_l_minus_1( - strategy: str, alpha: Any, + strategy: str, + alpha: Any, ) -> None: """Test predict output shape with string y.""" args_init, args_predict = STRATEGIES[strategy] @@ -1193,7 +898,7 @@ def test_y_1_to_l_minus_1( X, alpha=alpha, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) n_alpha = len(alpha) if hasattr(alpha, "__len__") else 1 assert y_pred.shape == (X.shape[0],) @@ -1203,7 +908,8 @@ def test_y_1_to_l_minus_1( @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_same_result_y_numeric_and_1_to_l_minus_1( - strategy: str, alpha: Any, + strategy: str, + alpha: Any, ) -> None: """Test that MAPIE outputs the same results if y is numeric or string""" @@ -1222,7 +928,7 @@ def test_same_result_y_numeric_and_1_to_l_minus_1( X, alpha=alpha, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_ps_int, y_ps_1) @@ -1240,19 +946,15 @@ def test_results_for_same_alpha(strategy: str) -> None: X, alpha=[0.1, 0.1], include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_ps[:, 0, 0], y_ps[:, 0, 1]) np.testing.assert_allclose(y_ps[:, 1, 0], y_ps[:, 1, 1]) @pytest.mark.parametrize("strategy", [*STRATEGIES]) -@pytest.mark.parametrize( - "alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)] -) -def test_results_for_alpha_as_float_and_arraylike( - strategy: str, alpha: Any -) -> None: +@pytest.mark.parametrize("alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)]) +def test_results_for_alpha_as_float_and_arraylike(strategy: str, alpha: Any) -> None: """Test that output values do not depend on type of alpha.""" args_init, args_predict = STRATEGIES[strategy] mapie_clf = MapieClassifier(**args_init) @@ -1261,19 +963,19 @@ def test_results_for_alpha_as_float_and_arraylike( X, alpha=alpha[0], include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) y_pred_float2, y_ps_float2 = mapie_clf.predict( X, alpha=alpha[1], include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) y_pred_array, y_ps_array = mapie_clf.predict( X, alpha=alpha, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_pred_float1, y_pred_array) np.testing.assert_allclose(y_pred_float2, y_pred_array) @@ -1296,22 +998,20 @@ def test_results_single_and_multi_jobs(strategy: str) -> None: X, alpha=0.2, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) y_pred_multi, y_ps_multi = mapie_clf_multi.predict( X, alpha=0.2, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_pred_single, y_pred_multi) np.testing.assert_allclose(y_ps_single, y_ps_multi) @pytest.mark.parametrize("strategy", [*STRATEGIES]) -def test_results_with_constant_sample_weights( - strategy: str -) -> None: +def test_results_with_constant_sample_weights(strategy: str) -> None: """ Test predictions when sample weights are None or constant with different values. @@ -1330,19 +1030,19 @@ def test_results_with_constant_sample_weights( X, alpha=0.2, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) y_pred1, y_ps1 = mapie_clf1.predict( X, alpha=0.2, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) y_pred2, y_ps2 = mapie_clf2.predict( X, alpha=0.2, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_pred0, y_pred1) np.testing.assert_allclose(y_pred0, y_pred2) @@ -1350,16 +1050,12 @@ def test_results_with_constant_sample_weights( np.testing.assert_allclose(y_ps0, y_ps2) -@pytest.mark.parametrize( - "alpha", [[0.2, 0.8], (0.2, 0.8), np.array([0.2, 0.8]), None] -) +@pytest.mark.parametrize("alpha", [[0.2, 0.8], (0.2, 0.8), np.array([0.2, 0.8]), None]) def test_valid_prediction(alpha: Any) -> None: """Test fit and predict.""" model = LogisticRegression(multi_class="multinomial") model.fit(X_toy, y_toy) - mapie_clf = MapieClassifier( - estimator=model, cv="prefit", random_state=random_state - ) + mapie_clf = MapieClassifier(estimator=model, cv="prefit", random_state=random_state) mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, alpha=alpha) @@ -1373,12 +1069,12 @@ def test_toy_dataset_predictions(strategy: str) -> None: else: clf = LogisticRegression() mapie_clf = MapieClassifier(estimator=clf, **args_init) - mapie_clf.fit(X_toy, y_toy, size_raps=.5) + mapie_clf.fit(X_toy, y_toy, size_raps=0.5) _, y_ps = mapie_clf.predict( X_toy, alpha=0.5, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_ps[:, :, 0], y_toy_mapie[strategy]) np.testing.assert_allclose( @@ -1403,7 +1099,7 @@ def test_toy_binary_dataset_predictions(strategy: str) -> None: X_toy, alpha=0.5, include_last_label=args_predict["include_last_label"], - agg_scores=args_predict["agg_scores"] + agg_scores=args_predict["agg_scores"], ) np.testing.assert_allclose(y_ps[:, :, 0], y_toy_binary_mapie[strategy]) np.testing.assert_allclose( @@ -1419,22 +1115,11 @@ def test_cumulated_scores() -> None: # fit cumclf = CumulatedScoreClassifier() cumclf.fit(cumclf.X_calib, cumclf.y_calib) - mapie_clf = MapieClassifier( - cumclf, - method="aps", - cv="prefit", - random_state=random_state - ) + mapie_clf = MapieClassifier(cumclf, method="aps", cv="prefit", random_state=random_state) mapie_clf.fit(cumclf.X_calib, cumclf.y_calib) - np.testing.assert_allclose( - mapie_clf.conformity_scores_, cumclf.y_calib_scores - ) + np.testing.assert_allclose(mapie_clf.conformity_scores_, cumclf.y_calib_scores) # predict - _, y_ps = mapie_clf.predict( - cumclf.X_test, - include_last_label=True, - alpha=alpha - ) + _, y_ps = mapie_clf.predict(cumclf.X_test, include_last_label=True, alpha=alpha) np.testing.assert_allclose(mapie_clf.quantiles_, quantile) np.testing.assert_allclose(y_ps[:, :, 0], cumclf.y_pred_sets) @@ -1449,20 +1134,11 @@ def test_image_cumulated_scores(X: Dict[str, ArrayLike]) -> None: X_test = X["X_test"] cumclf = ImageClassifier(X_calib, X_test) cumclf.fit(cumclf.X_calib, cumclf.y_calib) - mapie = MapieClassifier( - cumclf, - method="aps", - cv="prefit", - random_state=random_state - ) + mapie = MapieClassifier(cumclf, method="aps", cv="prefit", random_state=random_state) mapie.fit(cumclf.X_calib, cumclf.y_calib) np.testing.assert_allclose(mapie.conformity_scores_, cumclf.y_calib_scores) # predict - _, y_ps = mapie.predict( - cumclf.X_test, - include_last_label=True, - alpha=alpha - ) + _, y_ps = mapie.predict(cumclf.X_test, include_last_label=True, alpha=alpha) np.testing.assert_allclose(mapie.quantiles_, quantile) np.testing.assert_allclose(y_ps[:, :, 0], cumclf.y_pred_sets) @@ -1475,17 +1151,14 @@ def test_sum_proba_to_one_fit(y_pred_proba: NDArray) -> None: """ wrong_model = WrongOutputModel(y_pred_proba) mapie_clf = MapieClassifier(wrong_model, cv="prefit") - with pytest.raises( - AssertionError, match=r".*The sum of the scores is not equal to one.*" - ): + with pytest.raises(AssertionError, match=r".*The sum of the scores is not equal to one.*"): mapie_clf.fit(X_toy, y_toy) @pytest.mark.parametrize("y_pred_proba", Y_PRED_PROBA_WRONG) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_sum_proba_to_one_predict( - y_pred_proba: NDArray, - alpha: Union[float, Iterable[float]] + y_pred_proba: NDArray, alpha: Union[float, Iterable[float]] ) -> None: """ Test if when the output probabilities of the model do not @@ -1495,18 +1168,12 @@ def test_sum_proba_to_one_predict( mapie_clf = MapieClassifier(cv="prefit", random_state=random_state) mapie_clf.fit(X_toy, y_toy) mapie_clf.single_estimator_ = wrong_model - with pytest.raises( - AssertionError, match=r".*The sum of the scores is not equal to one.*" - ): + with pytest.raises(AssertionError, match=r".*The sum of the scores is not equal to one.*"): mapie_clf.predict(X_toy, alpha=alpha) -@pytest.mark.parametrize( - "estimator", [LogisticRegression(), make_pipeline(LogisticRegression())] -) -def test_classifier_without_classes_attribute( - estimator: ClassifierMixin -) -> None: +@pytest.mark.parametrize("estimator", [LogisticRegression(), make_pipeline(LogisticRegression())]) +def test_classifier_without_classes_attribute(estimator: ClassifierMixin) -> None: """ Test that prefitted classifier without 'classes_ 'attribute raises error. """ @@ -1515,24 +1182,16 @@ def test_classifier_without_classes_attribute( delattr(estimator[-1], "classes_") else: delattr(estimator, "classes_") - mapie = MapieClassifier( - estimator=estimator, cv="prefit", random_state=random_state - ) - with pytest.raises( - AttributeError, match=r".*does not contain 'classes_'.*" - ): + mapie = MapieClassifier(estimator=estimator, cv="prefit", random_state=random_state) + with pytest.raises(AttributeError, match=r".*does not contain 'classes_'.*"): mapie.fit(X_toy, y_toy) @pytest.mark.parametrize("method", WRONG_METHODS) def test_method_error_in_fit(monkeypatch: Any, method: str) -> None: """Test else condition for the method in .fit""" - monkeypatch.setattr( - MapieClassifier, "_check_parameters", do_nothing - ) - mapie_clf = MapieClassifier( - method=method, random_state=random_state - ) + monkeypatch.setattr(MapieClassifier, "_check_parameters", do_nothing) + mapie_clf = MapieClassifier(method=method, random_state=random_state) with pytest.raises(ValueError, match=r".*Invalid method.*"): mapie_clf.fit(X_toy, y_toy) @@ -1541,9 +1200,7 @@ def test_method_error_in_fit(monkeypatch: Any, method: str) -> None: @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) def test_method_error_in_predict(method: Any, alpha: float) -> None: """Test else condition for the method in .predict""" - mapie_clf = MapieClassifier( - method="lac", random_state=random_state - ) + mapie_clf = MapieClassifier(method="lac", random_state=random_state) mapie_clf.fit(X_toy, y_toy) mapie_clf.method = method with pytest.raises(ValueError, match=r".*Invalid method.*"): @@ -1556,20 +1213,11 @@ def test_include_label_error_in_predict( monkeypatch: Any, include_labels: Union[bool, str], alpha: float ) -> None: """Test else condition for include_label parameter in .predict""" - monkeypatch.setattr( - MapieClassifier, - "_check_include_last_label", - do_nothing - ) - mapie_clf = MapieClassifier( - method="aps", random_state=random_state - ) + monkeypatch.setattr(MapieClassifier, "_check_include_last_label", do_nothing) + mapie_clf = MapieClassifier(method="aps", random_state=random_state) mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*Invalid include.*"): - mapie_clf.predict( - X_toy, alpha=alpha, - include_last_label=include_labels - ) + mapie_clf.predict(X_toy, alpha=alpha, include_last_label=include_labels) def test_pred_loof_isnan() -> None: @@ -1602,15 +1250,10 @@ def test_pipeline_compatibility(strategy: str) -> None: ] ) categorical_preprocessor = Pipeline( - steps=[ - ("encoding", OneHotEncoder(handle_unknown="ignore")) - ] + steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))] ) preprocessor = ColumnTransformer( - [ - ("cat", categorical_preprocessor, ["x_cat"]), - ("num", numeric_preprocessor, ["x_num"]) - ] + [("cat", categorical_preprocessor, ["x_cat"]), ("num", numeric_preprocessor, ["x_num"])] ) pipe = make_pipeline(preprocessor, LogisticRegression()) pipe.fit(X, y) @@ -1640,32 +1283,21 @@ def test_classif_float32(cv) -> None: to the highest probability, MAPIE would have return empty prediction sets""" X_cal, y_cal = make_classification( - n_samples=20, - n_features=20, - n_redundant=0, - n_informative=20, - n_classes=3 + n_samples=20, n_features=20, n_redundant=0, n_informative=20, n_classes=3 ) X_test, _ = make_classification( - n_samples=20, - n_features=20, - n_redundant=0, - n_informative=20, - n_classes=3 + n_samples=20, n_features=20, n_redundant=0, n_informative=20, n_classes=3 ) - alpha = .9 + alpha = 0.9 dummy_classif = Float32OuputModel() mapie = MapieClassifier( - estimator=dummy_classif, method="naive", - cv=cv, random_state=random_state + estimator=dummy_classif, method="naive", cv=cv, random_state=random_state ) mapie.fit(X_cal, y_cal) _, yps = mapie.predict(X_test, alpha=alpha, include_last_label=True) - assert ( - np.repeat([[True, False, False]], 20, axis=0)[:, :, np.newaxis] == yps - ).all() + assert (np.repeat([[True, False, False]], 20, axis=0)[:, :, np.newaxis] == yps).all() @pytest.mark.parametrize("k_lambda", REGULARIZATION_PARAMETERS) @@ -1679,9 +1311,7 @@ def test_regularize_conf_scores_shape(k_lambda) -> None: mapie_clf = MapieClassifier(estimator=clf, **args_init) conf_scores = np.random.rand(100, 1) cutoff = np.cumsum(np.ones(conf_scores.shape)) - 1 - reg_conf_scores = mapie_clf._regularize_conformity_score( - k, lambda_, conf_scores, cutoff - ) + reg_conf_scores = mapie_clf._regularize_conformity_score(k, lambda_, conf_scores, cutoff) assert reg_conf_scores.shape == (100, 1, len(k)) @@ -1694,15 +1324,11 @@ def test_get_true_label_cumsum_proba_shape() -> None: clf = LogisticRegression() clf.fit(X, y) y_pred = clf.predict_proba(X) - mapie_clf = MapieClassifier( - estimator=clf, random_state=random_state - ) + mapie_clf = MapieClassifier(estimator=clf, random_state=random_state) mapie_clf.fit(X, y) - cumsum_proba, cutoff = mapie_clf._get_true_label_cumsum_proba( - y, y_pred - ) + cumsum_proba, cutoff = mapie_clf._get_true_label_cumsum_proba(y, y_pred) assert cumsum_proba.shape == (len(X), 1) - assert cutoff.shape == (len(X), ) + assert cutoff.shape == (len(X),) def test_get_true_label_cumsum_proba_result() -> None: @@ -1713,26 +1339,24 @@ def test_get_true_label_cumsum_proba_result() -> None: clf = LogisticRegression() clf.fit(X_toy, y_toy) y_pred = clf.predict_proba(X_toy) - mapie_clf = MapieClassifier( - estimator=clf, random_state=random_state - ) + mapie_clf = MapieClassifier(estimator=clf, random_state=random_state) mapie_clf.fit(X_toy, y_toy) - cumsum_proba, cutoff = mapie_clf._get_true_label_cumsum_proba( - y_toy, y_pred - ) + cumsum_proba, cutoff = mapie_clf._get_true_label_cumsum_proba(y_toy, y_pred) np.testing.assert_allclose( cumsum_proba, np.array( [ - y_pred[0, 0], y_pred[1, 0], + y_pred[0, 0], + y_pred[1, 0], y_pred[2, 0] + y_pred[2, 1], y_pred[3, 0] + y_pred[3, 1], - y_pred[4, 1], y_pred[5, 1], + y_pred[4, 1], + y_pred[5, 1], y_pred[6, 1] + y_pred[6, 2], y_pred[7, 1] + y_pred[7, 2], - y_pred[8, 2] + y_pred[8, 2], ] - )[:, np.newaxis] + )[:, np.newaxis], ) np.testing.assert_allclose(cutoff, np.array([1, 1, 2, 2, 1, 1, 2, 2, 1])) @@ -1746,22 +1370,19 @@ def test_get_last_included_proba_shape(k_lambda, strategy): """ lambda_, k = k_lambda[0], k_lambda[1] if len(k) == 1: - thresholds = .2 + thresholds = 0.2 else: thresholds = np.random.rand(len(k)) thresholds = cast(NDArray, check_alpha(thresholds)) clf = LogisticRegression() clf.fit(X, y) y_pred_proba = clf.predict_proba(X) - y_pred_proba = np.repeat( - y_pred_proba[:, :, np.newaxis], len(thresholds), axis=2 - ) + y_pred_proba = np.repeat(y_pred_proba[:, :, np.newaxis], len(thresholds), axis=2) mapie = MapieClassifier(estimator=clf, **STRATEGIES[strategy][0]) include_last_label = STRATEGIES[strategy][1]["include_last_label"] y_p_p_c, y_p_i_l, y_p_p_i_l = mapie._get_last_included_proba( - y_pred_proba, thresholds, - include_last_label, lambda_, k + y_pred_proba, thresholds, include_last_label, lambda_, k ) assert y_p_p_c.shape == (len(X), len(np.unique(y)), len(thresholds)) @@ -1770,9 +1391,7 @@ def test_get_last_included_proba_shape(k_lambda, strategy): @pytest.mark.parametrize("y_true_proba_place", Y_TRUE_PROBA_PLACE) -def test_get_true_label_position( - y_true_proba_place: List[NDArray] -) -> None: +def test_get_true_label_position(y_true_proba_place: List[NDArray]) -> None: """ Check that the returned true label position the good. """ @@ -1792,9 +1411,7 @@ def test_error_raps_cv_not_prefit(cv: Union[int, None]) -> None: Test that an error is raised if the method is RAPS and cv is different from prefit and split. """ - mapie = MapieClassifier( - method="raps", cv=cv, random_state=random_state - ) + mapie = MapieClassifier(method="raps", cv=cv, random_state=random_state) with pytest.raises(ValueError, match=r".*RAPS method can only.*"): mapie.fit(X_toy, y_toy) @@ -1810,12 +1427,11 @@ def test_not_all_label_in_calib() -> None: X_mapie = X[indices_remove] y_mapie = y[indices_remove] mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv="prefit", random_state=random_state + estimator=clf, method="aps", cv="prefit", random_state=random_state ) mapie_clf.fit(X_mapie, y_mapie) y_pred, y_pss = mapie_clf.predict(X, alpha=0.5) - assert y_pred.shape == (len(X), ) + assert y_pred.shape == (len(X),) assert y_pss.shape == (len(X), len(np.unique(y)), 1) @@ -1829,12 +1445,9 @@ def test_warning_not_all_label_in_calib() -> None: X_mapie = X[indices_remove] y_mapie = y[indices_remove] mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv="prefit", random_state=random_state + estimator=clf, method="aps", cv="prefit", random_state=random_state ) - with pytest.warns( - UserWarning, match=r".*WARNING: your calibration dataset.*" - ): + with pytest.warns(UserWarning, match=r".*WARNING: your calibration dataset.*"): mapie_clf.fit(X_mapie, y_mapie) @@ -1849,8 +1462,7 @@ def test_n_classes_prefit() -> None: X_mapie = X[indices_remove] y_mapie = y[indices_remove] mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv="prefit", random_state=random_state + estimator=clf, method="aps", cv="prefit", random_state=random_state ) mapie_clf.fit(X_mapie, y_mapie) assert mapie_clf.n_classes_ == len(np.unique(y)) @@ -1867,8 +1479,7 @@ def test_classes_prefit() -> None: X_mapie = X[indices_remove] y_mapie = y[indices_remove] mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv="prefit", random_state=random_state + estimator=clf, method="aps", cv="prefit", random_state=random_state ) mapie_clf.fit(X_mapie, y_mapie) assert (mapie_clf.classes_ == np.unique(y)).all() @@ -1884,10 +1495,7 @@ def test_classes_encoder_same_than_model() -> None: indices_remove = np.where(y != 2) X_mapie = X[indices_remove] y_mapie = y[indices_remove] - mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv="prefit" - ) + mapie_clf = MapieClassifier(estimator=clf, method="aps", cv="prefit") mapie_clf.fit(X_mapie, y_mapie) assert (mapie_clf.label_encoder_.classes_ == np.unique(y)).all() @@ -1899,10 +1507,7 @@ def test_n_classes_cv() -> None: """ clf = LogisticRegression() - mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv=5, random_state=random_state - ) + mapie_clf = MapieClassifier(estimator=clf, method="aps", cv=5, random_state=random_state) mapie_clf.fit(X, y) assert mapie_clf.n_classes_ == len(np.unique(y)) @@ -1914,10 +1519,7 @@ def test_classes_cv() -> None: """ clf = LogisticRegression() - mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv=5, random_state=random_state - ) + mapie_clf = MapieClassifier(estimator=clf, method="aps", cv=5, random_state=random_state) mapie_clf.fit(X, y) assert (mapie_clf.classes_ == np.unique(y)).all() @@ -1931,12 +1533,9 @@ def test_raise_error_new_class() -> None: clf.fit(X, y) y[-1] = 10 mapie_clf = MapieClassifier( - estimator=clf, method="aps", - cv="prefit", random_state=random_state + estimator=clf, method="aps", cv="prefit", random_state=random_state ) - with pytest.raises( - ValueError, match=r".*Values in y do not matched values.*" - ): + with pytest.raises(ValueError, match=r".*Values in y do not matched values.*"): mapie_clf.fit(X, y) @@ -1948,10 +1547,7 @@ def test_deprecated_method_warning(method: str) -> None: clf = LogisticRegression() clf.fit(X_toy, y_toy) mapie_clf = MapieClassifier( - estimator=clf, method=method, - cv="prefit", random_state=random_state + estimator=clf, method=method, cv="prefit", random_state=random_state ) - with pytest.warns( - DeprecationWarning, match=r".*WARNING: Deprecated method.*" - ): + with pytest.warns(DeprecationWarning, match=r".*WARNING: Deprecated method.*"): mapie_clf.fit(X_toy, y_toy) diff --git a/mapie/tests/test_common.py b/mapie/tests/test_common.py index 45379bc2..37a496cf 100644 --- a/mapie/tests/test_common.py +++ b/mapie/tests/test_common.py @@ -15,9 +15,7 @@ from mapie.regression import MapieQuantileRegressor, MapieRegressor X_toy = np.arange(18).reshape(-1, 1) -y_toy = np.array( - [0, 0, 1, 0, 1, 2, 1, 2, 2, 0, 0, 1, 0, 1, 2, 1, 2, 2] - ) +y_toy = np.array([0, 0, 1, 0, 1, 2, 1, 2, 2, 0, 0, 1, 0, 1, 2, 1, 2, 2]) def MapieSimpleEstimators() -> List[BaseEstimator]: @@ -87,19 +85,14 @@ def test_no_fit_predict(MapieEstimator: BaseEstimator) -> None: def test_default_sample_weight(MapieEstimator: BaseEstimator) -> None: """Test default sample weights.""" mapie_estimator = MapieEstimator() - assert ( - signature(mapie_estimator.fit).parameters["sample_weight"].default - is None - ) + assert signature(mapie_estimator.fit).parameters["sample_weight"].default is None @pytest.mark.parametrize("MapieEstimator", MapieSimpleEstimators()) def test_default_alpha(MapieEstimator: BaseEstimator) -> None: """Test default alpha.""" mapie_estimator = MapieEstimator() - assert ( - signature(mapie_estimator.predict).parameters["alpha"].default is None - ) + assert signature(mapie_estimator.predict).parameters["alpha"].default is None @pytest.mark.parametrize("pack", MapieDefaultEstimators()) @@ -111,16 +104,12 @@ def test_none_estimator(pack: Tuple[BaseEstimator, BaseEstimator]) -> None: if isinstance(mapie_estimator, MapieClassifier): assert isinstance(mapie_estimator.single_estimator_, DefaultEstimator) if isinstance(mapie_estimator, MapieRegressor): - assert isinstance( - mapie_estimator.estimator_.single_estimator_, DefaultEstimator - ) + assert isinstance(mapie_estimator.estimator_.single_estimator_, DefaultEstimator) @pytest.mark.parametrize("estimator", [0, "a", KFold(), ["a", "b"]]) @pytest.mark.parametrize("MapieEstimator", MapieSimpleEstimators()) -def test_invalid_estimator( - MapieEstimator: BaseEstimator, estimator: Any -) -> None: +def test_invalid_estimator(MapieEstimator: BaseEstimator, estimator: Any) -> None: """Test that invalid estimators raise errors.""" mapie_estimator = MapieEstimator(estimator=estimator) with pytest.raises(ValueError, match=r".*Invalid estimator.*"): @@ -128,9 +117,7 @@ def test_invalid_estimator( @pytest.mark.parametrize("pack", MapieTestEstimators()) -def test_invalid_prefit_estimator( - pack: Tuple[BaseEstimator, BaseEstimator] -) -> None: +def test_invalid_prefit_estimator(pack: Tuple[BaseEstimator, BaseEstimator]) -> None: """Test that non-fitted estimator with prefit cv raise errors.""" MapieEstimator, estimator = pack mapie_estimator = MapieEstimator(estimator=estimator, cv="prefit") @@ -139,9 +126,7 @@ def test_invalid_prefit_estimator( @pytest.mark.parametrize("pack", MapieTestEstimators()) -def test_valid_prefit_estimator( - pack: Tuple[BaseEstimator, BaseEstimator] -) -> None: +def test_valid_prefit_estimator(pack: Tuple[BaseEstimator, BaseEstimator]) -> None: """Test that fitted estimators with prefit cv raise no errors.""" MapieEstimator, estimator = pack estimator.fit(X_toy, y_toy) @@ -161,9 +146,7 @@ def test_invalid_method(MapieEstimator: BaseEstimator, method: str) -> None: @pytest.mark.parametrize("MapieEstimator", MapieSimpleEstimators()) -@pytest.mark.parametrize( - "cv", [-3.14, -2, 0, 1, "cv", LinearRegression(), [1, 2]] -) +@pytest.mark.parametrize("cv", [-3.14, -2, 0, 1, "cv", LinearRegression(), [1, 2]]) def test_invalid_cv(MapieEstimator: BaseEstimator, cv: Any) -> None: """Test that invalid cv raise errors.""" mapie_estimator = MapieEstimator(cv=cv) @@ -188,8 +171,6 @@ def test_none_alpha_results(pack: Tuple[BaseEstimator, BaseEstimator]) -> None: @parametrize_with_checks([MapieRegressor()]) -def test_sklearn_compatible_estimator( - estimator: BaseEstimator, check: Any -) -> None: +def test_sklearn_compatible_estimator(estimator: BaseEstimator, check: Any) -> None: """Check compatibility with sklearn, using sklearn estimator checks API.""" check(estimator) diff --git a/mapie/tests/test_conformity_scores.py b/mapie/tests/test_conformity_scores.py index ad7d4fbc..e324c250 100644 --- a/mapie/tests/test_conformity_scores.py +++ b/mapie/tests/test_conformity_scores.py @@ -8,10 +8,12 @@ from sklearn.preprocessing import PolynomialFeatures from mapie._typing import ArrayLike, NDArray -from mapie.conformity_scores import (AbsoluteConformityScore, - ConformityScore, - GammaConformityScore, - ResidualNormalisedScore) +from mapie.conformity_scores import ( + AbsoluteConformityScore, + ConformityScore, + GammaConformityScore, + ResidualNormalisedScore, +) from mapie.regression import MapieRegressor X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1) @@ -19,7 +21,7 @@ y_pred_list = [4, 7, 10, 12, 13, 12] conf_scores_list = [1, 0, -1, -1, 0, 3] conf_scores_gamma_list = [1 / 4, 0, -1 / 10, -1 / 12, 0, 3 / 12] -conf_scores_residual_norm_list = [0.2, 0., 0.11111111, 0.09090909, 0., 0.2] +conf_scores_residual_norm_list = [0.2, 0.0, 0.11111111, 0.09090909, 0.0, 0.2] random_state = 42 @@ -28,7 +30,10 @@ def __init__(self) -> None: super().__init__(sym=True, consistency_check=True) def get_signed_conformity_scores( - self, X: ArrayLike, y: ArrayLike, y_pred: ArrayLike, + self, + X: ArrayLike, + y: ArrayLike, + y_pred: ArrayLike, ) -> NDArray: return np.subtract(y, y_pred) @@ -55,12 +60,8 @@ def test_absolute_conformity_score_get_conformity_scores( ) -> None: """Test conformity score computation for AbsoluteConformityScore.""" abs_conf_score = AbsoluteConformityScore() - signed_conf_scores = abs_conf_score.get_signed_conformity_scores( - X_toy, y_toy, y_pred - ) - conf_scores = abs_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred - ) + signed_conf_scores = abs_conf_score.get_signed_conformity_scores(X_toy, y_toy, y_pred) + conf_scores = abs_conf_score.get_conformity_scores(X_toy, y_toy, y_pred) expected_signed_conf_scores = np.array(conf_scores_list) expected_conf_scores = np.abs(expected_signed_conf_scores) np.testing.assert_allclose(signed_conf_scores, expected_signed_conf_scores) @@ -68,17 +69,13 @@ def test_absolute_conformity_score_get_conformity_scores( @pytest.mark.parametrize("y_pred", [np.array(y_pred_list), y_pred_list]) -@pytest.mark.parametrize( - "conf_scores", [np.array(conf_scores_list), conf_scores_list] -) +@pytest.mark.parametrize("conf_scores", [np.array(conf_scores_list), conf_scores_list]) def test_absolute_conformity_score_get_estimation_distribution( y_pred: NDArray, conf_scores: NDArray ) -> None: """Test conformity observed value computation for AbsoluteConformityScore.""" # noqa: E501 abs_conf_score = AbsoluteConformityScore() - y_obs = abs_conf_score.get_estimation_distribution( - X_toy, y_pred, conf_scores - ) + y_obs = abs_conf_score.get_estimation_distribution(X_toy, y_pred, conf_scores) np.testing.assert_allclose(y_obs, y_toy) @@ -86,12 +83,8 @@ def test_absolute_conformity_score_get_estimation_distribution( def test_absolute_conformity_score_consistency(y_pred: NDArray) -> None: """Test methods consistency for AbsoluteConformityScore.""" abs_conf_score = AbsoluteConformityScore() - signed_conf_scores = abs_conf_score.get_signed_conformity_scores( - X_toy, y_toy, y_pred - ) - y_obs = abs_conf_score.get_estimation_distribution( - X_toy, y_pred, signed_conf_scores - ) + signed_conf_scores = abs_conf_score.get_signed_conformity_scores(X_toy, y_toy, y_pred) + y_obs = abs_conf_score.get_estimation_distribution(X_toy, y_pred, signed_conf_scores) np.testing.assert_allclose(y_obs, y_toy) @@ -101,9 +94,7 @@ def test_gamma_conformity_score_get_conformity_scores( ) -> None: """Test conformity score computation for GammaConformityScore.""" gamma_conf_score = GammaConformityScore() - conf_scores = gamma_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred - ) + conf_scores = gamma_conf_score.get_conformity_scores(X_toy, y_toy, y_pred) expected_signed_conf_scores = np.array(conf_scores_gamma_list) np.testing.assert_allclose(conf_scores, expected_signed_conf_scores) @@ -121,9 +112,7 @@ def test_gamma_conformity_score_get_estimation_distribution( ) -> None: """Test conformity observed value computation for GammaConformityScore.""" # noqa: E501 gamma_conf_score = GammaConformityScore() - y_obs = gamma_conf_score.get_estimation_distribution( - X_toy, y_pred, conf_scores - ) + y_obs = gamma_conf_score.get_estimation_distribution(X_toy, y_pred, conf_scores) np.testing.assert_allclose(y_obs, y_toy) @@ -131,12 +120,8 @@ def test_gamma_conformity_score_get_estimation_distribution( def test_gamma_conformity_score_consistency(y_pred: NDArray) -> None: """Test methods consistency for GammaConformityScore.""" gamma_conf_score = GammaConformityScore() - signed_conf_scores = gamma_conf_score.get_signed_conformity_scores( - X_toy, y_toy, y_pred - ) - y_obs = gamma_conf_score.get_estimation_distribution( - X_toy, y_pred, signed_conf_scores - ) + signed_conf_scores = gamma_conf_score.get_signed_conformity_scores(X_toy, y_toy, y_pred) + y_obs = gamma_conf_score.get_estimation_distribution(X_toy, y_pred, signed_conf_scores) np.testing.assert_allclose(y_obs, y_toy) @@ -149,15 +134,11 @@ def test_gamma_conformity_score_consistency(y_pred: NDArray) -> None: [1, -7, 9, 11, 13, 15], ], ) -def test_gamma_conformity_score_check_oberved_value( - y_pred: NDArray, y_toy: NDArray -) -> None: +def test_gamma_conformity_score_check_oberved_value(y_pred: NDArray, y_toy: NDArray) -> None: """Test methods consistency for GammaConformityScore.""" gamma_conf_score = GammaConformityScore() with pytest.raises(ValueError): - gamma_conf_score.get_signed_conformity_scores( - [], y_toy, y_pred - ) + gamma_conf_score.get_signed_conformity_scores([], y_toy, y_pred) @pytest.mark.parametrize( @@ -188,20 +169,10 @@ def test_gamma_conformity_score_check_predicted_value( ) -> None: """Test methods consistency for GammaConformityScore.""" gamma_conf_score = GammaConformityScore() - with pytest.raises( - ValueError, - match=r".*At least one of the predicted target is negative.*" - ): - gamma_conf_score.get_signed_conformity_scores( - X_toy, y_toy, y_pred - ) - with pytest.raises( - ValueError, - match=r".*At least one of the predicted target is negative.*" - ): - gamma_conf_score.get_estimation_distribution( - X_toy, y_pred, conf_scores - ) + with pytest.raises(ValueError, match=r".*At least one of the predicted target is negative.*"): + gamma_conf_score.get_signed_conformity_scores(X_toy, y_toy, y_pred) + with pytest.raises(ValueError, match=r".*At least one of the predicted target is negative.*"): + gamma_conf_score.get_estimation_distribution(X_toy, y_pred, conf_scores) def test_check_consistency() -> None: @@ -210,21 +181,14 @@ def test_check_consistency() -> None: and distributions raises an error. """ dummy_conf_score = DummyConformityScore() - conformity_scores = dummy_conf_score.get_signed_conformity_scores( - X_toy, y_toy, y_pred_list - ) - with pytest.raises( - ValueError, - match=r".*The two functions get_conformity_scores.*" - ): - dummy_conf_score.check_consistency( - X_toy, y_toy, y_pred_list, conformity_scores - ) + conformity_scores = dummy_conf_score.get_signed_conformity_scores(X_toy, y_toy, y_pred_list) + with pytest.raises(ValueError, match=r".*The two functions get_conformity_scores.*"): + dummy_conf_score.check_consistency(X_toy, y_toy, y_pred_list, conformity_scores) @pytest.mark.parametrize("y_pred", [np.array(y_pred_list), y_pred_list]) def test_residual_normalised_prefit_conformity_score_get_conformity_scores( - y_pred: NDArray + y_pred: NDArray, ) -> None: """ Test conformity score computation for ResidualNormalisedScore @@ -232,32 +196,22 @@ def test_residual_normalised_prefit_conformity_score_get_conformity_scores( """ residual_estimator = LinearRegression().fit(X_toy, y_toy) residual_norm_conf_score = ResidualNormalisedScore( - residual_estimator=residual_estimator, - prefit=True, - random_state=random_state - ) - conf_scores = residual_norm_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred + residual_estimator=residual_estimator, prefit=True, random_state=random_state ) + conf_scores = residual_norm_conf_score.get_conformity_scores(X_toy, y_toy, y_pred) expected_signed_conf_scores = np.array(conf_scores_residual_norm_list) np.testing.assert_allclose(conf_scores, expected_signed_conf_scores) @pytest.mark.parametrize("y_pred", [np.array(y_pred_list), y_pred_list]) -def test_residual_normalised_conformity_score_get_conformity_scores( - y_pred: NDArray -) -> None: +def test_residual_normalised_conformity_score_get_conformity_scores(y_pred: NDArray) -> None: """ Test conformity score computation for ResidualNormalisedScore when prefit is False. """ residual_norm_score = ResidualNormalisedScore(random_state=random_state) - conf_scores = residual_norm_score.get_conformity_scores( - X_toy, y_toy, y_pred - ) - expected_signed_conf_scores = np.array( - [np.nan, np.nan, 1.e+08, 1.e+08, 0.e+00, 3.e+08] - ) + conf_scores = residual_norm_score.get_conformity_scores(X_toy, y_toy, y_pred) + expected_signed_conf_scores = np.array([np.nan, np.nan, 1.0e08, 1.0e08, 0.0e00, 3.0e08]) np.testing.assert_allclose(conf_scores, expected_signed_conf_scores) @@ -267,33 +221,26 @@ def test_residual_normalised_score_prefit_with_notfitted_estim() -> None: residual_estimator=LinearRegression(), prefit=True ) with pytest.raises(ValueError): - residual_norm_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred_list - ) + residual_norm_conf_score.get_conformity_scores(X_toy, y_toy, y_pred_list) def test_residual_normalised_score_with_default_params() -> None: """Test that no error is raised with default parameters.""" residual_norm_score = ResidualNormalisedScore() - conf_scores = residual_norm_score.get_conformity_scores( - X_toy, y_toy, y_pred_list - ) + conf_scores = residual_norm_score.get_conformity_scores(X_toy, y_toy, y_pred_list) residual_norm_score.get_estimation_distribution(X_toy, y_toy, conf_scores) def test_invalid_estimator() -> None: """Test that an estimator without predict method raises an error.""" + class DumbEstimator: def __init__(self): pass - residual_norm_conf_score = ResidualNormalisedScore( - residual_estimator=DumbEstimator() - ) + residual_norm_conf_score = ResidualNormalisedScore(residual_estimator=DumbEstimator()) with pytest.raises(ValueError): - residual_norm_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred_list - ) + residual_norm_conf_score.get_conformity_scores(X_toy, y_toy, y_pred_list) def test_cross_residual_normalised() -> None: @@ -301,9 +248,7 @@ def test_cross_residual_normalised() -> None: Test that residual normalised score with cross method raises an error. """ with pytest.raises(ValueError): - MapieRegressor(conformity_score=ResidualNormalisedScore()).fit( - X_toy, y_toy - ) + MapieRegressor(conformity_score=ResidualNormalisedScore()).fit(X_toy, y_toy) def test_residual_normalised_score_pipe() -> None: @@ -311,19 +256,13 @@ def test_residual_normalised_score_pipe() -> None: Test that residual normalised score function raises no error with a pipeline estimator. """ - pipe = Pipeline([ - ("poly", PolynomialFeatures(degree=2)), - ("linear", LinearRegression()) - ]) + pipe = Pipeline([("poly", PolynomialFeatures(degree=2)), ("linear", LinearRegression())]) mapie_reg = MapieRegressor( - conformity_score=ResidualNormalisedScore( - residual_estimator=pipe, split_size=0.2 - ), + conformity_score=ResidualNormalisedScore(residual_estimator=pipe, split_size=0.2), cv="split", - random_state=random_state + random_state=random_state, ) - mapie_reg.fit(np.concatenate((X_toy, X_toy)), - np.concatenate((y_toy, y_toy))) + mapie_reg.fit(np.concatenate((X_toy, X_toy)), np.concatenate((y_toy, y_toy))) def test_residual_normalised_score_pipe_prefit() -> None: @@ -331,17 +270,14 @@ def test_residual_normalised_score_pipe_prefit() -> None: Test that residual normalised score function raises no error with a pipeline estimator prefitted. """ - pipe = Pipeline([ - ("poly", PolynomialFeatures(degree=2)), - ("linear", LinearRegression()) - ]) + pipe = Pipeline([("poly", PolynomialFeatures(degree=2)), ("linear", LinearRegression())]) pipe.fit(X_toy, y_toy) mapie_reg = MapieRegressor( conformity_score=ResidualNormalisedScore( residual_estimator=pipe, split_size=0.2, prefit=True ), cv="split", - random_state=random_state + random_state=random_state, ) mapie_reg.fit(X_toy, y_toy) @@ -351,17 +287,15 @@ def test_residual_normalised_prefit_estimator_with_neg_values() -> None: Test that a prefit estimator for the residual estimator of the residual normalised score that predicts negative values raises a warning. """ + class NegativeRegresssor(LinearRegression): def predict(self, X): - return np.full(X.shape[0], fill_value=-1.) + return np.full(X.shape[0], fill_value=-1.0) + estim = NegativeRegresssor().fit(X_toy, y_toy) - residual_norm_conf_score = ResidualNormalisedScore( - residual_estimator=estim, prefit=True - ) + residual_norm_conf_score = ResidualNormalisedScore(residual_estimator=estim, prefit=True) with pytest.warns(UserWarning): - residual_norm_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred_list - ) + residual_norm_conf_score.get_conformity_scores(X_toy, y_toy, y_pred_list) def test_residual_normalised_prefit_get_estimation_distribution() -> None: @@ -370,28 +304,17 @@ def test_residual_normalised_prefit_get_estimation_distribution() -> None: normalised score raises no error. """ estim = LinearRegression().fit(X_toy, y_toy) - residual_normalised_conf_score = ResidualNormalisedScore( - residual_estimator=estim, prefit=True - ) - conf_scores = residual_normalised_conf_score.get_conformity_scores( - X_toy, y_toy, y_pred_list - ) - residual_normalised_conf_score.get_estimation_distribution( - X_toy, y_pred_list, conf_scores - ) + residual_normalised_conf_score = ResidualNormalisedScore(residual_estimator=estim, prefit=True) + conf_scores = residual_normalised_conf_score.get_conformity_scores(X_toy, y_toy, y_pred_list) + residual_normalised_conf_score.get_estimation_distribution(X_toy, y_pred_list, conf_scores) -@pytest.mark.parametrize("score", [AbsoluteConformityScore(), - GammaConformityScore(), - ResidualNormalisedScore()]) +@pytest.mark.parametrize( + "score", [AbsoluteConformityScore(), GammaConformityScore(), ResidualNormalisedScore()] +) @pytest.mark.parametrize("alpha", [[0.3], [0.5, 0.4]]) -def test_intervals_shape_with_every_score( - score: ConformityScore, - alpha: Any -) -> None: - mapie_reg = MapieRegressor( - method="base", cv="split", conformity_score=score - ) +def test_intervals_shape_with_every_score(score: ConformityScore, alpha: Any) -> None: + mapie_reg = MapieRegressor(method="base", cv="split", conformity_score=score) X = np.concatenate((X_toy, X_toy)) y = np.concatenate((y_toy, y_toy)) mapie_reg = mapie_reg.fit(X, y) diff --git a/mapie/tests/test_control_risk.py b/mapie/tests/test_control_risk.py index c4a94f61..cd04b2f6 100644 --- a/mapie/tests/test_control_risk.py +++ b/mapie/tests/test_control_risk.py @@ -9,45 +9,30 @@ import numpy as np -from mapie.control_risk.risks import (compute_risk_precision, - compute_risk_recall) +from mapie.control_risk.risks import compute_risk_precision, compute_risk_recall from mapie.control_risk.p_values import compute_hoeffdding_bentkus_p_value -from mapie.control_risk.ltt import (ltt_procedure, - find_lambda_control_star) +from mapie.control_risk.ltt import ltt_procedure, find_lambda_control_star lambdas = np.array([0.5, 0.9]) -y_toy = np.stack([ - [1, 0, 1], - [0, 1, 0], - [1, 1, 0], - [1, 1, 1], -]) +y_toy = np.stack( + [ + [1, 0, 1], + [0, 1, 0], + [1, 1, 0], + [1, 1, 1], + ] +) -y_preds_proba = np.stack([ - [0.2, 0.6, 0.9], - [0.8, 0.2, 0.6], - [0.4, 0.8, 0.1], - [0.6, 0.8, 0.7] -]) +y_preds_proba = np.stack([[0.2, 0.6, 0.9], [0.8, 0.2, 0.6], [0.4, 0.8, 0.1], [0.6, 0.8, 0.7]]) y_preds_proba = np.expand_dims(y_preds_proba, axis=2) -test_recall = np.array([ - [1/2, 1.], - [1., 1.], - [1/2, 1.], - [0., 1.] -]) +test_recall = np.array([[1 / 2, 1.0], [1.0, 1.0], [1 / 2, 1.0], [0.0, 1.0]]) -test_precision = np.array([ - [1/2, 1.], - [1., 1.], - [0., 1.], - [0., 1.] -]) +test_precision = np.array([[1 / 2, 1.0], [1.0, 1.0], [0.0, 1.0], [0.0, 1.0]]) r_hat = np.array([0.5, 0.8]) @@ -59,10 +44,7 @@ wrong_alpha = 0 -wrong_alpha_shape = np.array([ - [0.1, 0.2], - [0.3, 0.4] -]) +wrong_alpha_shape = np.array([[0.1, 0.2], [0.3, 0.4]]) random_state = 42 prng = np.random.RandomState(random_state) @@ -123,9 +105,7 @@ def test_compute_precision_with_wrong_shape() -> None: with pytest.raises(ValueError, match=r".*y_pred_proba should be a 3d*"): compute_risk_precision(lambdas, y_preds_proba.squeeze(), y_toy) with pytest.raises(ValueError, match=r".*y should be a 2d*"): - compute_risk_precision( - lambdas, y_preds_proba, np.expand_dims(y_toy, 2) - ) + compute_risk_precision(lambdas, y_preds_proba, np.expand_dims(y_toy, 2)) with pytest.raises(ValueError, match=r".*could not be broadcast*"): compute_risk_precision(lambdas, y_preds_proba, y_toy[:-1]) @@ -150,10 +130,7 @@ def test_find_lambda_control_star() -> None: @pytest.mark.parametrize("delta", [0.1, 0.8]) @pytest.mark.parametrize("alpha", [[0.5], [0.6, 0.8]]) -def test_ltt_type_output_alpha_delta( - alpha: NDArray, - delta: float -) -> None: +def test_ltt_type_output_alpha_delta(alpha: NDArray, delta: float) -> None: """Test type output _ltt_procedure""" valid_index, p_values = ltt_procedure(r_hat, alpha, delta, n) assert isinstance(valid_index, list) @@ -169,9 +146,7 @@ def test_find_lambda_control_star_output(valid_index: List[List[int]]) -> None: def test_warning_valid_index_empty() -> None: """Test warning sent when empty list""" valid_index = [[]] # type: List[List[int]] - with pytest.warns( - UserWarning, match=r".*At least one sequence is empty*" - ): + with pytest.warns(UserWarning, match=r".*At least one sequence is empty*"): find_lambda_control_star(r_hat, valid_index, lambdas) diff --git a/mapie/tests/test_metrics.py b/mapie/tests/test_metrics.py index c09a5c47..2128cfaf 100644 --- a/mapie/tests/test_metrics.py +++ b/mapie/tests/test_metrics.py @@ -9,47 +9,53 @@ from typing_extensions import TypedDict from mapie._typing import ArrayLike, NDArray -from mapie.metrics import (add_jitter, - classification_coverage_score, - classification_coverage_score_v2, - classification_mean_width_score, - classification_ssc, - classification_ssc_score, - cumulative_differences, - expected_calibration_error, - hsic, - kolmogorov_smirnov_cdf, - kolmogorov_smirnov_p_value, - kolmogorov_smirnov_statistic, - kuiper_cdf, - kuiper_p_value, - kuiper_statistic, - length_scale, - regression_coverage_score, - regression_coverage_score_v2, - regression_mean_width_score, - regression_ssc, - regression_ssc_score, - sort_xy_by_y, - spiegelhalter_p_value, - spiegelhalter_statistic, - top_label_ece) +from mapie.metrics import ( + add_jitter, + classification_coverage_score, + classification_coverage_score_v2, + classification_mean_width_score, + classification_ssc, + classification_ssc_score, + cumulative_differences, + expected_calibration_error, + hsic, + kolmogorov_smirnov_cdf, + kolmogorov_smirnov_p_value, + kolmogorov_smirnov_statistic, + kuiper_cdf, + kuiper_p_value, + kuiper_statistic, + length_scale, + regression_coverage_score, + regression_coverage_score_v2, + regression_mean_width_score, + regression_ssc, + regression_ssc_score, + sort_xy_by_y, + spiegelhalter_p_value, + spiegelhalter_statistic, + top_label_ece, +) y_toy = np.array([5, 7.5, 9.5, 10.5, 12.5]) -y_preds = np.array([ - [5, 4, 6], - [7.5, 6.0, 9.0], - [9.5, 9, 10.0], - [10.5, 8.5, 12.5], - [11.5, 10.5, 12.0], -]) -intervals = np.array([ - [[4, 4], [6, 7.5]], - [[6.0, 8], [9.0, 10]], - [[9, 9], [10.0, 10.0]], - [[8.5, 9], [12.5, 12]], - [[10.5, 10.5], [12.0, 12]] -]) +y_preds = np.array( + [ + [5, 4, 6], + [7.5, 6.0, 9.0], + [9.5, 9, 10.0], + [10.5, 8.5, 12.5], + [11.5, 10.5, 12.0], + ] +) +intervals = np.array( + [ + [[4, 4], [6, 7.5]], + [[6.0, 8], [9.0, 10]], + [[9, 9], [10.0, 10.0]], + [[8.5, 9], [12.5, 12]], + [[10.5, 10.5], [12.0, 12]], + ] +) y_true_class = np.array([3, 3, 1, 2, 2]) y_pred_set = np.array( @@ -61,125 +67,78 @@ [False, True, False, True], ] ) -y_pred_set_2alphas = np.array([ - [ - [False, False], - [False, True], - [False, True], - [False, False], - ], - [ - [False, False], - [True, True], - [True, True], - [True, True] - ], +y_pred_set_2alphas = np.array( [ - [False, False], - [True, False], - [True, False], - [False, False] - ], - [ - [True, False], - [True, False], - [True, True], - [True, False], - ], - [ - [False, False], - [True, True], - [False, True], - [True, True] + [ + [False, False], + [False, True], + [False, True], + [False, False], + ], + [[False, False], [True, True], [True, True], [True, True]], + [[False, False], [True, False], [True, False], [False, False]], + [ + [True, False], + [True, False], + [True, True], + [True, False], + ], + [[False, False], [True, True], [False, True], [True, True]], ] -]) +) Params_ssc_reg = TypedDict( "Params_ssc_reg", - { - "y_intervals": NDArray, - "num_bins": int - }, + {"y_intervals": NDArray, "num_bins": int}, ) Params_ssc_classif = TypedDict( "Params_ssc_classif", - { - "y_pred_set": NDArray, - "num_bins": Union[int, None] - }, + {"y_pred_set": NDArray, "num_bins": Union[int, None]}, ) SSC_REG = { - "1alpha_base": Params_ssc_reg( - y_intervals=intervals[:, :, 0], - num_bins=2 - ), - "1alpha_3sp": Params_ssc_reg( - y_intervals=intervals[:, :, 0], - num_bins=3 - ), - "2alpha_base": Params_ssc_reg( - y_intervals=intervals, - num_bins=2 - ), - "2alpha_3sp": Params_ssc_reg( - y_intervals=intervals, - num_bins=3 - ), + "1alpha_base": Params_ssc_reg(y_intervals=intervals[:, :, 0], num_bins=2), + "1alpha_3sp": Params_ssc_reg(y_intervals=intervals[:, :, 0], num_bins=3), + "2alpha_base": Params_ssc_reg(y_intervals=intervals, num_bins=2), + "2alpha_3sp": Params_ssc_reg(y_intervals=intervals, num_bins=3), } SSC_CLASSIF = { - "1alpha_base": Params_ssc_classif( - y_pred_set=y_pred_set_2alphas[:, :, 0], - num_bins=2 - ), - "1alpha_3sp": Params_ssc_classif( - y_pred_set=y_pred_set_2alphas[:, :, 0], - num_bins=3 - ), - "1alpha_None": Params_ssc_classif( - y_pred_set=y_pred_set_2alphas[:, :, 0], - num_bins=None - ), + "1alpha_base": Params_ssc_classif(y_pred_set=y_pred_set_2alphas[:, :, 0], num_bins=2), + "1alpha_3sp": Params_ssc_classif(y_pred_set=y_pred_set_2alphas[:, :, 0], num_bins=3), + "1alpha_None": Params_ssc_classif(y_pred_set=y_pred_set_2alphas[:, :, 0], num_bins=None), "2alpha_base": Params_ssc_classif( y_pred_set=y_pred_set_2alphas, num_bins=2, ), - "2alpha_3sp": Params_ssc_classif( - y_pred_set=y_pred_set_2alphas, - num_bins=3 - ), - "2alpha_None": Params_ssc_classif( - y_pred_set=y_pred_set_2alphas, - num_bins=None - ), + "2alpha_3sp": Params_ssc_classif(y_pred_set=y_pred_set_2alphas, num_bins=3), + "2alpha_None": Params_ssc_classif(y_pred_set=y_pred_set_2alphas, num_bins=None), } SSC_REG_COVERAGES = { - "1alpha_base": np.array([[2/3, 1.]]), - "1alpha_3sp": np.array([[0.5, 1., 1.]]), - "2alpha_base": np.array([[2/3, 1.], [1/3, 1.]]), - "2alpha_3sp": np.array([[0.5, 1., 1.], [0.5, 0.5, 1.]]), + "1alpha_base": np.array([[2 / 3, 1.0]]), + "1alpha_3sp": np.array([[0.5, 1.0, 1.0]]), + "2alpha_base": np.array([[2 / 3, 1.0], [1 / 3, 1.0]]), + "2alpha_3sp": np.array([[0.5, 1.0, 1.0], [0.5, 0.5, 1.0]]), } SSC_REG_COVERAGES_SCORE = { - "1alpha_base": np.array([2/3]), + "1alpha_base": np.array([2 / 3]), "1alpha_3sp": np.array([0.5]), - "2alpha_base": np.array([2/3, 1/3]), + "2alpha_base": np.array([2 / 3, 1 / 3]), "2alpha_3sp": np.array([0.5, 0.5]), } SSC_CLASSIF_COVERAGES = { - "1alpha_base": np.array([[1/3, 1.]]), - "1alpha_3sp": np.array([[0., 2/3, 1.]]), - "1alpha_None": np.array([[0., np.nan, 0.5, 1., 1.]]), - "2alpha_base": np.array([[1/3, 1.], [1/3, 1.]]), - "2alpha_3sp": np.array([[0., 2/3, 1.], [0.5, 2/3, np.nan]]), - "2alpha_None": np.array([[0., np.nan, 0.5, 1., 1.], - [0., 1., 0., 1., np.nan]]), + "1alpha_base": np.array([[1 / 3, 1.0]]), + "1alpha_3sp": np.array([[0.0, 2 / 3, 1.0]]), + "1alpha_None": np.array([[0.0, np.nan, 0.5, 1.0, 1.0]]), + "2alpha_base": np.array([[1 / 3, 1.0], [1 / 3, 1.0]]), + "2alpha_3sp": np.array([[0.0, 2 / 3, 1.0], [0.5, 2 / 3, np.nan]]), + "2alpha_None": np.array([[0.0, np.nan, 0.5, 1.0, 1.0], [0.0, 1.0, 0.0, 1.0, np.nan]]), } SSC_CLASSIF_COVERAGES_SCORE = { "1alpha_base": np.array([1 / 3]), - "1alpha_3sp": np.array([0.]), - "1alpha_None": np.array([0.]), + "1alpha_3sp": np.array([0.0]), + "1alpha_None": np.array([0.0]), "2alpha_base": np.array([1 / 3, 1 / 3]), - "2alpha_3sp": np.array([0., 0.5]), - "2alpha_None": np.array([0., 0.]), + "2alpha_3sp": np.array([0.0, 0.5]), + "2alpha_None": np.array([0.0, 0.0]), } prng = RandomState(1234567890) @@ -238,11 +197,26 @@ def test_regression_same_length() -> None: with pytest.raises(ValueError, match=r".*y should be a 1d array*"): regression_mean_width_score(y_preds[:, :2], y_preds[:, 2]) with pytest.raises(ValueError, match=r".*shape mismatch*"): - regression_ssc(y_toy, intervals[:-1, ]) + regression_ssc( + y_toy, + intervals[ + :-1, + ], + ) with pytest.raises(ValueError, match=r".*shape mismatch*"): - regression_ssc_score(y_toy, intervals[:-1, ]) + regression_ssc_score( + y_toy, + intervals[ + :-1, + ], + ) with pytest.raises(ValueError, match=r".*shape mismatch*"): - hsic(y_toy, intervals[:-1, ]) + hsic( + y_toy, + intervals[ + :-1, + ], + ) def test_regression_toydata_coverage_score() -> None: @@ -272,14 +246,11 @@ def test_regression_ypredup_type_coverage_score() -> None: def test_classification_y_true_shape() -> None: """Test shape of y_true.""" with pytest.raises(ValueError, match=r".*y should be a 1d array*"): - classification_coverage_score( - np.tile(y_true_class, (2, 1)), y_pred_set - ) + classification_coverage_score(np.tile(y_true_class, (2, 1)), y_pred_set) with pytest.raises(ValueError, match=r".*y should be a 1d array*"): classification_ssc(np.tile(y_true_class, (2, 1)), y_pred_set_2alphas) with pytest.raises(ValueError, match=r".*y should be a 1d array*"): - classification_ssc_score(np.tile(y_true_class, (2, 1)), - y_pred_set_2alphas) + classification_ssc_score(np.tile(y_true_class, (2, 1)), y_pred_set_2alphas) def test_classification_y_pred_set_shape() -> None: @@ -374,7 +345,7 @@ def test_ece_scores() -> None: def test_top_lable_ece() -> None: - """Test that score is """ + """Test that score is""" scr = top_label_ece(y_true, y_scores) assert np.round(scr, 4) == 0.6997 @@ -384,36 +355,23 @@ def test_top_label_same_result() -> None: Test that we have the same results if the input contais the maximum with the argmax values or if it is the probabilities """ - pred_proba_ = np.array( - [ - [0.2, 0.2, 0.4], - [0.5, 0.3, 0.2], - [0, 0.4, 0.6], - [0.1, 0.7, 0.2] - ] - ) + pred_proba_ = np.array([[0.2, 0.2, 0.4], [0.5, 0.3, 0.2], [0, 0.4, 0.6], [0.1, 0.7, 0.2]]) y_true_ = np.array([1, 0, 2, 1]) pred_max_ = np.max(pred_proba_, axis=1) pred_argmax_ = np.argmax(pred_proba_, axis=1) scr1 = top_label_ece(y_true_, pred_proba_) - scr2 = top_label_ece( - y_true_, - pred_max_, - y_score_arg=pred_argmax_ - ) + scr2 = top_label_ece(y_true_, pred_max_, y_score_arg=pred_argmax_) - classes = np.unique([y_true_+1]) + classes = np.unique([y_true_ + 1]) scr3 = top_label_ece( - y_true_+1, + y_true_ + 1, pred_proba_, classes=classes, ) scr4 = top_label_ece( - y_true_+1, - np.max(pred_proba_, axis=1), - classes[np.argmax(pred_proba_, axis=1)] + y_true_ + 1, np.max(pred_proba_, axis=1), classes[np.argmax(pred_proba_, axis=1)] ) assert scr1 == scr2 assert scr1 == scr3 @@ -485,8 +443,7 @@ def test_invalid_splits_classification_ssc(num_bins: int) -> None: def test_invalid_splits_classification_ssc_score(num_bins: int) -> None: """Test that invalid number of bins for ssc raise errors.""" with pytest.raises(ValueError): - classification_ssc_score(y_true_class, y_pred_set_2alphas, - num_bins=num_bins) + classification_ssc_score(y_true_class, y_pred_set_2alphas, num_bins=num_bins) @pytest.mark.parametrize("num_bins", [3, 2, None]) @@ -498,8 +455,7 @@ def test_valid_splits_classification_ssc(num_bins: int) -> None: @pytest.mark.parametrize("num_bins", [3, 2, None]) def test_valid_splits_classification_ssc_score(num_bins: int) -> None: """Test that valid number of bins for ssc raise no error.""" - classification_ssc_score(y_true_class, y_pred_set_2alphas, - num_bins=num_bins) + classification_ssc_score(y_true_class, y_pred_set_2alphas, num_bins=num_bins) @pytest.mark.parametrize("params", [*SSC_CLASSIF]) @@ -512,8 +468,7 @@ def test_classification_ssc_return_shape(params: str) -> None: @pytest.mark.parametrize("params", [*SSC_CLASSIF]) def test_classification_ssc_score_return_shape(params: str) -> None: """Test that the arrays returned by ssc metrics have the correct shape.""" - cond_cov_min = classification_ssc_score(y_true_class, - **SSC_CLASSIF[params]) + cond_cov_min = classification_ssc_score(y_true_class, **SSC_CLASSIF[params]) assert cond_cov_min.shape == SSC_CLASSIF_COVERAGES_SCORE[params].shape @@ -527,11 +482,8 @@ def test_classification_ssc_coverage_values(params: str) -> None: @pytest.mark.parametrize("params", [*SSC_CLASSIF]) def test_classification_ssc_score_coverage_values(params: str) -> None: """Test that the conditional coverage values returned are correct.""" - cond_cov_min = classification_ssc_score(y_true_class, - **SSC_CLASSIF[params]) - np.testing.assert_allclose( - cond_cov_min, SSC_CLASSIF_COVERAGES_SCORE[params] - ) + cond_cov_min = classification_ssc_score(y_true_class, **SSC_CLASSIF[params]) + np.testing.assert_allclose(cond_cov_min, SSC_CLASSIF_COVERAGES_SCORE[params]) @pytest.mark.parametrize("kernel_sizes", [[1], 2, [[1, 2]], [-1, 1], [1, -1]]) @@ -565,9 +517,7 @@ def test_regression_coverage_v1andv2() -> None: Test that ``regression_coverage_score`` and ```regression_coverage_score_v2``` returns the same results """ - cov_v1 = regression_coverage_score( - y_toy, intervals[:, 0, 0], intervals[:, 1, 0] - ) + cov_v1 = regression_coverage_score(y_toy, intervals[:, 0, 0], intervals[:, 1, 0]) cov_v2 = regression_coverage_score_v2(np.expand_dims(y_toy, 1), intervals) np.testing.assert_allclose(cov_v1, cov_v2[0]) @@ -580,9 +530,7 @@ def test_regression_coverage_score_v2_ytrue_valid_shape() -> None: def test_regression_coverage_score_v2_intervals_invalid_shape() -> None: """Test that an error is raised if intervals has not the good shape.""" with pytest.raises(ValueError): - regression_coverage_score_v2( - np.expand_dims(y_toy, 1), intervals[:, 0, 0] - ) + regression_coverage_score_v2(np.expand_dims(y_toy, 1), intervals[:, 0, 0]) def test_classification_coverage_v1andv2() -> None: @@ -592,8 +540,7 @@ def test_classification_coverage_v1andv2() -> None: """ cov_v1 = classification_coverage_score(y_true_class, y_pred_set) cov_v2 = classification_coverage_score_v2( - np.expand_dims(y_true_class, axis=1), - np.expand_dims(y_pred_set, axis=2) + np.expand_dims(y_true_class, axis=1), np.expand_dims(y_pred_set, axis=2) ) np.testing.assert_allclose(cov_v1, cov_v2[0]) @@ -606,9 +553,7 @@ def test_classification_coverage_score_v2_ytrue_valid_shape() -> None: def test_classification_coverage_score_v2_ypredset_invalid_shape() -> None: """Test that an error is raised if y_pred_set has not the good shape.""" with pytest.raises(ValueError): - classification_coverage_score_v2( - np.expand_dims(y_true_class, axis=1), y_pred_set[:, 0] - ) + classification_coverage_score_v2(np.expand_dims(y_true_class, axis=1), y_pred_set[:, 0]) @pytest.mark.parametrize("amplitude", [0.1, 0.01, 0.001]) @@ -616,7 +561,7 @@ def test_add_jitter_amplitude(amplitude: float) -> None: """Test that noise perturbation is consistent with required amplitude""" x = np.array([0, 1, 2, 3, 4]) x_jittered = add_jitter(x, noise_amplitude=amplitude, random_state=1) - np.testing.assert_allclose(x, x_jittered, rtol=5*amplitude) + np.testing.assert_allclose(x, x_jittered, rtol=5 * amplitude) def test_sort_xy_by_y() -> None: diff --git a/mapie/tests/test_multi_label_classification.py b/mapie/tests/test_multi_label_classification.py index ad21c027..0e39fc2d 100644 --- a/mapie/tests/test_multi_label_classification.py +++ b/mapie/tests/test_multi_label_classification.py @@ -22,12 +22,12 @@ "method": str, "bound": Optional[str], "random_state": Optional[int], - "metric_control": Optional[str] - } + "metric_control": Optional[str], + }, ) METHODS = ["crc", "rcps", "ltt"] -METRICS = ['recall', 'precision'] +METRICS = ["recall", "precision"] BOUNDS = ["wsr", "hoeffding", "bernstein"] random_state = 42 @@ -38,45 +38,22 @@ STRATEGIES = { - "crc": ( - Params( - method="crc", - bound=None, - random_state=random_state, - metric_control="recall" - ), - ), + "crc": (Params(method="crc", bound=None, random_state=random_state, metric_control="recall"),), "rcps_wsr": ( - Params( - method="rcps", - bound="wsr", - random_state=random_state, - metric_control='recall' - ), + Params(method="rcps", bound="wsr", random_state=random_state, metric_control="recall"), ), "rcps_hoeffding": ( Params( - method="rcps", - bound="hoeffding", - random_state=random_state, - metric_control='recall' + method="rcps", bound="hoeffding", random_state=random_state, metric_control="recall" ), ), "rcps_bernstein": ( Params( - method="rcps", - bound="bernstein", - random_state=random_state, - metric_control='recall' + method="rcps", bound="bernstein", random_state=random_state, metric_control="recall" ), ), "ltt": ( - Params( - method="ltt", - bound=None, - random_state=random_state, - metric_control='precision' - ), + Params(method="ltt", bound=None, random_state=random_state, metric_control="precision"), ), } @@ -91,7 +68,7 @@ [True, True, True], [True, True, True], [True, True, True], - [False, True, True] + [False, True, True], ], "rcps_bernstein": [ [True, True, True], @@ -136,12 +113,11 @@ [False, False, False], [False, False, False], [False, False, False], - ] + ], } class WrongOutputModel: - def __init__(self): pass @@ -153,7 +129,6 @@ def predict(self, *args: Any): class ArrayOutputModel: - def __init__(self): self.trained_ = True @@ -161,12 +136,12 @@ def fit(self, *args: Any) -> None: """Dummy fit.""" def predict_proba(self, X: NDArray, *args: Any) -> NDArray: - probas = np.array([[.9, .05, .05]]) + probas = np.array([[0.9, 0.05, 0.05]]) proba_out = np.repeat(probas, len(X), axis=0) return proba_out def predict(self, X: NDArray, *args: Any) -> NDArray: - return self.predict_proba(X) >= .3 + return self.predict_proba(X) >= 0.3 def __sklearn_is_fitted__(self): return True @@ -175,24 +150,24 @@ def __sklearn_is_fitted__(self): X_toy = np.arange(9).reshape(-1, 1) y_toy = np.stack( [ - [1, 0, 1], [1, 0, 0], [0, 1, 1], - [0, 1, 0], [0, 0, 1], [1, 1, 1], - [1, 1, 0], [1, 0, 1], [0, 1, 1] + [1, 0, 1], + [1, 0, 0], + [0, 1, 1], + [0, 1, 0], + [0, 0, 1], + [1, 1, 1], + [1, 1, 0], + [1, 0, 1], + [0, 1, 1], ] ) X, y = make_multilabel_classification( - n_samples=1000, - n_classes=5, - random_state=random_state, - allow_unlabeled=False + n_samples=1000, n_classes=5, random_state=random_state, allow_unlabeled=False ) X_no_label, y_no_label = make_multilabel_classification( - n_samples=1000, - n_classes=5, - random_state=random_state, - allow_unlabeled=True + n_samples=1000, n_classes=5, random_state=random_state, allow_unlabeled=True ) @@ -204,10 +179,7 @@ def test_initialized() -> None: def test_valid_estimator() -> None: """Test that valid estimators are not corrupted, for all strategies.""" clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - estimator=clf, - random_state=random_state - ) + mapie_clf = MapieMultiLabelClassifier(estimator=clf, random_state=random_state) mapie_clf.fit(X_toy, y_toy) assert isinstance(mapie_clf.single_estimator_, MultiOutputClassifier) @@ -224,8 +196,7 @@ def test_valid_metric_method(strategy: str) -> None: """Test that valid metric raise no errors""" args = STRATEGIES[strategy][0] mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, - metric_control=args["metric_control"] + random_state=random_state, metric_control=args["metric_control"] ) mapie_clf.fit(X_toy, y_toy) check_is_fitted(mapie_clf, mapie_clf.fit_attributes) @@ -234,34 +205,25 @@ def test_valid_metric_method(strategy: str) -> None: @pytest.mark.parametrize("bound", BOUNDS) def test_valid_bound(bound: str) -> None: """Test that valid methods raise no errors.""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method="rcps" - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="rcps") mapie_clf.fit(X_toy, y_toy) - mapie_clf.predict(X_toy, bound=bound, delta=.1) + mapie_clf.predict(X_toy, bound=bound, delta=0.1) check_is_fitted(mapie_clf, mapie_clf.fit_attributes) @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.3], (0.2, 0.3)]) @pytest.mark.parametrize("delta", [0.2, 0.1, 0.05]) -def test_predict_output_shape( - strategy: str, alpha: Any, delta: Any -) -> None: +def test_predict_output_shape(strategy: str, alpha: Any, delta: Any) -> None: """Test predict output shape.""" args = STRATEGIES[strategy][0] mapie_clf = MapieMultiLabelClassifier( method=args["method"], metric_control=args["metric_control"], - random_state=args["random_state"] + random_state=args["random_state"], ) mapie_clf.fit(X, y) - y_pred, y_ps = mapie_clf.predict( - X, - alpha=alpha, - bound=args["bound"], - delta=.1 - ) + y_pred, y_ps = mapie_clf.predict(X, alpha=alpha, bound=args["bound"], delta=0.1) n_alpha = len(alpha) if hasattr(alpha, "__len__") else 1 assert y_pred.shape == y.shape assert y_ps.shape == (y.shape[0], y.shape[1], n_alpha) @@ -277,15 +239,10 @@ def test_results_for_same_alpha(strategy: str) -> None: mapie_clf = MapieMultiLabelClassifier( method=args["method"], metric_control=args["metric_control"], - random_state=args["random_state"] + random_state=args["random_state"], ) mapie_clf.fit(X, y) - _, y_ps = mapie_clf.predict( - X, - alpha=[0.1, 0.1], - bound=args["bound"], - delta=.1 - ) + _, y_ps = mapie_clf.predict(X, alpha=[0.1, 0.1], bound=args["bound"], delta=0.1) np.testing.assert_allclose(y_ps[:, 0, 0], y_ps[:, 0, 1]) np.testing.assert_allclose(y_ps[:, 1, 0], y_ps[:, 1, 1]) @@ -302,7 +259,7 @@ def test_results_for_partial_fit(strategy: str) -> None: estimator=clf, method=args["method"], metric_control=args["metric_control"], - random_state=args["random_state"] + random_state=args["random_state"], ) mapie_clf.fit(X, y) @@ -310,64 +267,38 @@ def test_results_for_partial_fit(strategy: str) -> None: estimator=clf, method=args["method"], metric_control=args["metric_control"], - random_state=args["random_state"] + random_state=args["random_state"], ) for i in range(len(X)): - mapie_clf_partial.partial_fit( - X[i][np.newaxis, :], - y[i][np.newaxis, :] - ) - - y_pred, y_ps = mapie_clf.predict( - X, - alpha=[0.1, 0.1], - bound=args["bound"], - delta=.1 - ) + mapie_clf_partial.partial_fit(X[i][np.newaxis, :], y[i][np.newaxis, :]) + + y_pred, y_ps = mapie_clf.predict(X, alpha=[0.1, 0.1], bound=args["bound"], delta=0.1) y_pred_partial, y_ps_partial = mapie_clf_partial.predict( - X, - alpha=[0.1, 0.1], - bound=args["bound"], - delta=.1 + X, alpha=[0.1, 0.1], bound=args["bound"], delta=0.1 ) np.testing.assert_allclose(y_pred, y_pred_partial) np.testing.assert_allclose(y_ps, y_ps_partial) @pytest.mark.parametrize("strategy", [*STRATEGIES]) -@pytest.mark.parametrize( - "alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)] -) -def test_results_for_alpha_as_float_and_arraylike( - strategy: str, alpha: Any -) -> None: +@pytest.mark.parametrize("alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)]) +def test_results_for_alpha_as_float_and_arraylike(strategy: str, alpha: Any) -> None: """Test that output values do not depend on type of alpha.""" args = STRATEGIES[strategy][0] mapie_clf = MapieMultiLabelClassifier( method=args["method"], metric_control=args["metric_control"], - random_state=args["random_state"] + random_state=args["random_state"], ) mapie_clf.fit(X, y) y_pred_float1, y_ps_float1 = mapie_clf.predict( - X, - alpha=alpha[0], - bound=args["bound"], - delta=.9 + X, alpha=alpha[0], bound=args["bound"], delta=0.9 ) y_pred_float2, y_ps_float2 = mapie_clf.predict( - X, - alpha=alpha[1], - bound=args["bound"], - delta=.9 - ) - y_pred_array, y_ps_array = mapie_clf.predict( - X, - alpha=alpha, - bound=args["bound"], - delta=.9 + X, alpha=alpha[1], bound=args["bound"], delta=0.9 ) + y_pred_array, y_ps_array = mapie_clf.predict(X, alpha=alpha, bound=args["bound"], delta=0.9) np.testing.assert_allclose(y_pred_float1, y_pred_array) np.testing.assert_allclose(y_pred_float2, y_pred_array) np.testing.assert_allclose(y_ps_float1[:, :, 0], y_ps_array[:, :, 0]) @@ -382,70 +313,58 @@ def test_results_single_and_multi_jobs(strategy: str) -> None: """ args = STRATEGIES[strategy][0] mapie_clf_single = MapieMultiLabelClassifier( - n_jobs=1, - metric_control=args["metric_control"], - random_state=args["random_state"] + n_jobs=1, metric_control=args["metric_control"], random_state=args["random_state"] ) mapie_clf_multi = MapieMultiLabelClassifier( - n_jobs=-1, - metric_control=args["metric_control"], - random_state=args["random_state"] + n_jobs=-1, metric_control=args["metric_control"], random_state=args["random_state"] ) mapie_clf_single.fit(X, y) mapie_clf_multi.fit(X, y) y_pred_single, y_ps_single = mapie_clf_single.predict( - X, - alpha=0.2, - bound=args["bound"], - delta=.9 + X, alpha=0.2, bound=args["bound"], delta=0.9 ) y_pred_multi, y_ps_multi = mapie_clf_multi.predict( - X, - alpha=0.2, - bound=args["bound"], - delta=.9 + X, alpha=0.2, bound=args["bound"], delta=0.9 ) np.testing.assert_allclose(y_pred_single, y_pred_multi) np.testing.assert_allclose(y_ps_single, y_ps_multi) @pytest.mark.parametrize( - "alpha", [[0.2, 0.8], (0.2, 0.8), np.array([0.2, 0.8]), None], + "alpha", + [[0.2, 0.8], (0.2, 0.8), np.array([0.2, 0.8]), None], ) @pytest.mark.parametrize( - "delta", [.1, .2, .5, .9, .001], + "delta", + [0.1, 0.2, 0.5, 0.9, 0.001], ) @pytest.mark.parametrize( - "bound", BOUNDS, + "bound", + BOUNDS, ) def test_valid_prediction(alpha: Any, delta: Any, bound: Any) -> None: """Test fit and predict.""" - model = MultiOutputClassifier( - LogisticRegression(multi_class="multinomial") - ) + model = MultiOutputClassifier(LogisticRegression(multi_class="multinomial")) model.fit(X_toy, y_toy) mapie_clf = MapieMultiLabelClassifier( - estimator=model, method="rcps", - random_state=random_state + estimator=model, method="rcps", random_state=random_state ) mapie_clf.fit(X_toy, y_toy) - mapie_clf.predict( - X_toy, - alpha=alpha, - bound=bound, - delta=delta - ) + mapie_clf.predict(X_toy, alpha=alpha, bound=bound, delta=delta) @pytest.mark.parametrize( - "alpha", [[0.2, 0.8], (0.2, 0.8), np.array([0.2, 0.8]), None], + "alpha", + [[0.2, 0.8], (0.2, 0.8), np.array([0.2, 0.8]), None], ) @pytest.mark.parametrize( - "delta", [.1, .2, .5, .9, .001], + "delta", + [0.1, 0.2, 0.5, 0.9, 0.001], ) @pytest.mark.parametrize( - "bound", BOUNDS, + "bound", + BOUNDS, ) @pytest.mark.parametrize("strategy", [*STRATEGIES]) def test_array_output_model(strategy: str, alpha: Any, delta: Any, bound: Any): @@ -455,22 +374,15 @@ def test_array_output_model(strategy: str, alpha: Any, delta: Any, bound: Any): estimator=model, method=args["method"], metric_control=args["metric_control"], - random_state=random_state + random_state=random_state, ) mapie_clf.fit(X_toy, y_toy) - mapie_clf.predict( - X_toy, - alpha=alpha, - bound=bound, - delta=delta - ) + mapie_clf.predict(X_toy, alpha=alpha, bound=bound, delta=delta) def test_reinit_new_fit(): clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - estimator=clf, random_state=random_state - ) + mapie_clf = MapieMultiLabelClassifier(estimator=clf, random_state=random_state) mapie_clf.fit(X_toy, y_toy) mapie_clf.fit(X_toy, y_toy) assert len(mapie_clf.risks) == len(X_toy) @@ -479,9 +391,7 @@ def test_reinit_new_fit(): @pytest.mark.parametrize("method", WRONG_METHODS) def test_method_error_in_fit(method: str) -> None: """Test error for wrong method""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method=method - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method=method) with pytest.raises(ValueError, match=r".*Invalid method.*"): mapie_clf.fit(X_toy, y_toy) @@ -496,13 +406,8 @@ def test_method_error_if_no_label_fit() -> None: def test_method_error_if_no_label_partial_fit() -> None: """Test error for wrong method""" - clf = MultiOutputClassifier(LogisticRegression()).fit( - X_no_label, - y_no_label - ) - mapie_clf = MapieMultiLabelClassifier( - estimator=clf, random_state=random_state - ) + clf = MultiOutputClassifier(LogisticRegression()).fit(X_no_label, y_no_label) + mapie_clf = MapieMultiLabelClassifier(estimator=clf, random_state=random_state) with pytest.raises(ValueError, match=r".*Invalid y.*"): mapie_clf.partial_fit(X_no_label, y_no_label) @@ -510,31 +415,24 @@ def test_method_error_if_no_label_partial_fit() -> None: @pytest.mark.parametrize("bound", WRONG_BOUNDS) def test_bound_error_in_predict(bound: str) -> None: """Test error for wrong bounds""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method='rcps' - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="rcps") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*bound must be in.*"): - mapie_clf.predict(X_toy, bound=bound, delta=.1) + mapie_clf.predict(X_toy, bound=bound, delta=0.1) @pytest.mark.parametrize("metric_control", WRONG_METRICS) def test_metric_error_in_fit(metric_control: str) -> None: """Test error for wrong metrics""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, - metric_control=metric_control - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, metric_control=metric_control) with pytest.raises(ValueError, match=r".*Invalid metric. *"): mapie_clf.fit(X_toy, y_toy) def test_error_rcps_delta_null() -> None: """Test error for RCPS method and delta None""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method='rcps' - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="rcps") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*delta cannot be ``None``*"): @@ -543,33 +441,25 @@ def test_error_rcps_delta_null() -> None: def test_error_ltt_delta_null() -> None: """Test error for LTT method and delta None""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, - metric_control='precision' - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, metric_control="precision") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*Invalid delta. *"): mapie_clf.predict(X_toy) -@pytest.mark.parametrize("delta", [-1., 0, 1, 4, -3]) +@pytest.mark.parametrize("delta", [-1.0, 0, 1, 4, -3]) def test_error_delta_wrong_value(delta: Any) -> None: """Test error for RCPS method and delta None""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method='rcps' - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="rcps") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*delta must be*"): mapie_clf.predict(X_toy, delta=delta) -@pytest.mark.parametrize("delta", [-1., 0, 1, 4, -3]) +@pytest.mark.parametrize("delta", [-1.0, 0, 1, 4, -3]) def test_error_delta_wrong_value_ltt(delta: Any) -> None: """Test error for RCPS method and delta None""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, - metric_control='precision' - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, metric_control="precision") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*delta must be*"): @@ -578,9 +468,7 @@ def test_error_delta_wrong_value_ltt(delta: Any) -> None: def test_bound_none_crc() -> None: """Test that a warning is raised nound is not None with CRC method.""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method="crc" - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="crc") mapie_clf.fit(X_toy, y_toy) with pytest.warns(UserWarning, match=r"WARNING: you are using crc*"): @@ -589,12 +477,10 @@ def test_bound_none_crc() -> None: def test_delta_none_crc() -> None: """Test that a warning is raised nound is not None with CRC method.""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method="crc" - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="crc") mapie_clf.fit(X_toy, y_toy) with pytest.warns(UserWarning, match=r"WARNING: you are using crc*"): - mapie_clf.predict(X_toy, bound=None, delta=.1) + mapie_clf.predict(X_toy, bound=None, delta=0.1) def test_warning_estimator_none() -> None: @@ -604,24 +490,19 @@ def test_warning_estimator_none() -> None: mapie_clf.fit(X_toy, y_toy) -@pytest.mark.parametrize("delta", [np.arange(0, 1, 0.01), (.1, .2), [.4, .5]]) +@pytest.mark.parametrize("delta", [np.arange(0, 1, 0.01), (0.1, 0.2), [0.4, 0.5]]) def test_error_delta_wrong_type(delta: Any) -> None: """Test error for RCPS method and delta None""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, method="rcps" - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, method="rcps") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*delta must be a float*"): mapie_clf.predict(X_toy, delta=delta) -@pytest.mark.parametrize("delta", [np.arange(0, 1, 0.01), (.1, .2), [.4, .5]]) +@pytest.mark.parametrize("delta", [np.arange(0, 1, 0.01), (0.1, 0.2), [0.4, 0.5]]) def test_error_delta_wrong_type_ltt(delta: Any) -> None: """Test error for LTT method and delta None""" - mapie_clf = MapieMultiLabelClassifier( - random_state=random_state, - metric_control="precision" - ) + mapie_clf = MapieMultiLabelClassifier(random_state=random_state, metric_control="precision") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*delta must be a float*"): @@ -631,9 +512,7 @@ def test_error_delta_wrong_type_ltt(delta: Any) -> None: def test_error_partial_fit_different_size() -> None: """Test error for partial_fit with different size""" clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - estimator=clf, random_state=random_state - ) + mapie_clf = MapieMultiLabelClassifier(estimator=clf, random_state=random_state) mapie_clf.partial_fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*Number of features*"): mapie_clf.partial_fit(X, y) @@ -649,65 +528,43 @@ def test_pipeline_compatibility(strategy: str) -> None: "x_num": [0, 1, 1, 4, np.nan, 5], } ) - y = np.array( - [ - [0, 0, 1], [0, 0, 1], - [1, 1, 0], [1, 0, 1], - [1, 0, 1], [1, 1, 1] - ] - ) + y = np.array([[0, 0, 1], [0, 0, 1], [1, 1, 0], [1, 0, 1], [1, 0, 1], [1, 1, 1]]) numeric_preprocessor = Pipeline( [ ("imputer", SimpleImputer(strategy="mean")), ] ) categorical_preprocessor = Pipeline( - steps=[ - ("encoding", OneHotEncoder(handle_unknown="ignore")) - ] + steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))] ) preprocessor = ColumnTransformer( - [ - ("cat", categorical_preprocessor, ["x_cat"]), - ("num", numeric_preprocessor, ["x_num"]) - ] - ) - pipe = make_pipeline( - preprocessor, - MultiOutputClassifier(LogisticRegression()) + [("cat", categorical_preprocessor, ["x_cat"]), ("num", numeric_preprocessor, ["x_num"])] ) + pipe = make_pipeline(preprocessor, MultiOutputClassifier(LogisticRegression())) pipe.fit(X, y) mapie = MapieMultiLabelClassifier( estimator=pipe, method=args["method"], metric_control=args["metric_control"], - random_state=random_state + random_state=random_state, ) mapie.fit(X, y) - mapie.predict(X, bound=args["bound"], delta=.1) + mapie.predict(X, bound=args["bound"], delta=0.1) def test_error_no_fit() -> None: """Test error for no fit""" clf = WrongOutputModel() - mapie_clf = MapieMultiLabelClassifier( - estimator=clf, random_state=random_state - ) - with pytest.raises( - ValueError, - match=r".*Please provide a classifier with*" - ): + mapie_clf = MapieMultiLabelClassifier(estimator=clf, random_state=random_state) + with pytest.raises(ValueError, match=r".*Please provide a classifier with*"): mapie_clf.fit(X_toy, y_toy) def test_error_estimator_none_partial() -> None: """Test error estimator none partial""" mapie_clf = MapieMultiLabelClassifier(random_state=random_state) - with pytest.raises( - ValueError, - match=r".*Invalid estimator with partial_fit*" - ): + with pytest.raises(ValueError, match=r".*Invalid estimator with partial_fit*"): mapie_clf.partial_fit(X_toy, y_toy) @@ -718,9 +575,7 @@ def test_partial_fit_first_time(): def test_partial_fit_second_time(): clf = MultiOutputClassifier(LogisticRegression()).fit(X, y) - mclf = MapieMultiLabelClassifier( - estimator=clf, random_state=random_state - ) + mclf = MapieMultiLabelClassifier(estimator=clf, random_state=random_state) mclf.partial_fit(X, y) assert not mclf._check_partial_fit_first_call() @@ -736,20 +591,11 @@ def test_toy_dataset_predictions(strategy: str) -> None: clf, method=args["method"], metric_control=args["metric_control"], - random_state=random_state + random_state=random_state, ) mapie_clf.fit(X_toy, y_toy) - _, y_ps = mapie_clf.predict( - X_toy, - alpha=.2, - bound=args["bound"], - delta=.1 - ) - np.testing.assert_allclose( - y_ps[:, :, 0], - y_toy_mapie[strategy], - rtol=1e-6 - ) + _, y_ps = mapie_clf.predict(X_toy, alpha=0.2, bound=args["bound"], delta=0.1) + np.testing.assert_allclose(y_ps[:, :, 0], y_toy_mapie[strategy], rtol=1e-6) @pytest.mark.parametrize("method", ["rcps", "crc"]) @@ -759,15 +605,8 @@ def test_error_wrong_method_metric_precision(method: str) -> None: with invalid method . """ clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - clf, - method=method, - metric_control="precision" - ) - with pytest.raises( - ValueError, - match=r".*Invalid method for metric*" - ): + mapie_clf = MapieMultiLabelClassifier(clf, method=method, metric_control="precision") + with pytest.raises(ValueError, match=r".*Invalid method for metric*"): mapie_clf.fit(X_toy, y_toy) @@ -778,33 +617,20 @@ def test_check_metric_control(method: str) -> None: with invalid method . """ clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - clf, - method=method, - metric_control="recall" - ) - with pytest.raises( - ValueError, - match=r".*Invalid method for metric*" - ): + mapie_clf = MapieMultiLabelClassifier(clf, method=method, metric_control="recall") + with pytest.raises(ValueError, match=r".*Invalid method for metric*"): mapie_clf.fit(X_toy, y_toy) def test_method_none_precision() -> None: clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - clf, - metric_control="precision" - ) + mapie_clf = MapieMultiLabelClassifier(clf, metric_control="precision") mapie_clf.fit(X_toy, y_toy) assert mapie_clf.method == "ltt" def test_method_none_recall() -> None: clf = MultiOutputClassifier(LogisticRegression()).fit(X_toy, y_toy) - mapie_clf = MapieMultiLabelClassifier( - clf, - metric_control="recall" - ) + mapie_clf = MapieMultiLabelClassifier(clf, metric_control="recall") mapie_clf.fit(X_toy, y_toy) assert mapie_clf.method == "crc" diff --git a/mapie/tests/test_quantile_regression.py b/mapie/tests/test_quantile_regression.py index e37a176a..dadac514 100644 --- a/mapie/tests/test_quantile_regression.py +++ b/mapie/tests/test_quantile_regression.py @@ -22,41 +22,25 @@ from mapie.metrics import regression_coverage_score from mapie.regression import MapieQuantileRegressor -X_toy = np.array( - [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, - 5, 0, 1, 2, 3, 4, 5] -).reshape(-1, 1) +X_toy = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]).reshape( + -1, 1 +) y_toy = np.array( - [5, 7, 9, 11, 13, 15, 5, 7, 9, 11, 13, 15, 5, 7, 9, - 11, 13, 15, 5, 7, 9, 11, 13, 15] + [5, 7, 9, 11, 13, 15, 5, 7, 9, 11, 13, 15, 5, 7, 9, 11, 13, 15, 5, 7, 9, 11, 13, 15] ) random_state = 1 X_train_toy, X_calib_toy, y_train_toy, y_calib_toy = train_test_split( - X_toy, - y_toy, - test_size=0.5, - random_state=random_state + X_toy, y_toy, test_size=0.5, random_state=random_state ) qt = QuantileRegressor(solver="highs-ds") -gb = GradientBoostingRegressor( - loss="quantile", - random_state=random_state - ) - -X, y = make_regression( - n_samples=500, - n_features=10, - noise=1.0, - random_state=random_state - ) +gb = GradientBoostingRegressor(loss="quantile", random_state=random_state) + +X, y = make_regression(n_samples=500, n_features=10, noise=1.0, random_state=random_state) X_train, X_calib, y_train, y_calib = train_test_split( - X, - y, - test_size=0.5, - random_state=random_state + X, y, test_size=0.5, random_state=random_state ) SYMMETRY = [True, False] @@ -75,7 +59,7 @@ "quantile_alpha3": Params(method="quantile", alpha=0.3), "quantile_alpha4": Params(method="quantile", alpha=0.4), "quantile_alpha8": Params(method="quantile", alpha=0.8), - } +} WIDTHS = { "quantile_alpha2": 2.7360884795455576, @@ -131,21 +115,13 @@ def test_default_parameters() -> None: def test_default_sample_weight() -> None: """Test default sample weights.""" mapie_reg = MapieQuantileRegressor() - assert ( - signature(mapie_reg.fit).parameters["sample_weight"].default - is None - ) + assert signature(mapie_reg.fit).parameters["sample_weight"].default is None def test_default_parameters_estimator() -> None: """Test default values of estimator.""" mapie_reg = MapieQuantileRegressor() - mapie_reg.fit( - X_train, - y_train, - X_calib=X_calib, - y_calib=y_calib - ) + mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib) for estimator in mapie_reg.estimators_: assert isinstance(estimator, QuantileRegressor) assert estimator.__dict__["solver"] == "highs-ds" @@ -157,15 +133,8 @@ def test_no_predict_fit_estimator() -> None: ValueError, match=r".*Invalid estimator.*", ): - mapie_reg = MapieQuantileRegressor( - estimator=NotFitPredictEstimator(alpha=0.2) - ) - mapie_reg.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie_reg = MapieQuantileRegressor(estimator=NotFitPredictEstimator(alpha=0.2)) + mapie_reg.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) def test_no_para_loss_estimator() -> None: @@ -175,21 +144,12 @@ def test_no_para_loss_estimator() -> None: match=r".*The matching parameter `loss_name`*", ): mapie_reg = MapieQuantileRegressor() - mapie_reg.quantile_estimator_params[ - "NoLossPamameterEstimator" - ] = { + mapie_reg.quantile_estimator_params["NoLossPamameterEstimator"] = { "loss_name": "noloss", - "alpha_name": "alpha" + "alpha_name": "alpha", } - mapie_reg.estimator = NoLossPamameterEstimator( - alpha=0.2 - ) - mapie_reg.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie_reg.estimator = NoLossPamameterEstimator(alpha=0.2) + mapie_reg.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) def test_no_para_alpha_estimator() -> None: @@ -199,61 +159,39 @@ def test_no_para_alpha_estimator() -> None: match=r".*The matching parameter `alpha_name`*", ): mapie_reg = MapieQuantileRegressor() - mapie_reg.quantile_estimator_params[ - "NoAlphaPamameterEstimator" - ] = { + mapie_reg.quantile_estimator_params["NoAlphaPamameterEstimator"] = { "loss_name": "loss", - "alpha_name": "noalpha" + "alpha_name": "noalpha", } - mapie_reg.estimator = NoAlphaPamameterEstimator( - alpha=0.2, - loss="quantile" - ) - mapie_reg.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie_reg.estimator = NoAlphaPamameterEstimator(alpha=0.2, loss="quantile") + mapie_reg.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("estimator", ESTIMATOR) def test_valid_method(strategy: str, estimator: RegressorMixin) -> None: """Test that valid strategies and estimators raise no error""" - mapie_reg = MapieQuantileRegressor( - estimator=estimator, - **STRATEGIES[strategy] - ) - mapie_reg.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie_reg = MapieQuantileRegressor(estimator=estimator, **STRATEGIES[strategy]) + mapie_reg.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) check_is_fitted(mapie_reg, mapie_reg.fit_attributes) assert mapie_reg.__dict__["method"] == "quantile" @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("estimator", ESTIMATOR) -@pytest.mark.parametrize("dataset", [ - (X_train, X_calib, y_train, y_calib), - (X_train_toy, X_calib_toy, y_train_toy, y_calib_toy) - ] +@pytest.mark.parametrize( + "dataset", + [(X_train, X_calib, y_train, y_calib), (X_train_toy, X_calib_toy, y_train_toy, y_calib_toy)], ) @pytest.mark.parametrize("symmetry", SYMMETRY) def test_predict_output_shape( strategy: str, estimator: RegressorMixin, dataset: Tuple[NDArray, NDArray, NDArray, NDArray], - symmetry: bool + symmetry: bool, ) -> None: """Test predict output shape.""" - mapie_reg = MapieQuantileRegressor( - estimator=estimator, - **STRATEGIES[strategy] - ) + mapie_reg = MapieQuantileRegressor(estimator=estimator, **STRATEGIES[strategy]) (X_t, X_c, y_t, y_c) = dataset mapie_reg.fit(X_t, y_t, X_calib=X_c, y_calib=y_c) y_pred, y_pis = mapie_reg.predict(X_t, symmetry=symmetry) @@ -271,49 +209,24 @@ def test_results_with_constant_sample_weights( or constant with different values. """ n_samples = len(X_train) - mapie0 = MapieQuantileRegressor( - estimator=qt, - **STRATEGIES[strategy] - ) - mapie1 = MapieQuantileRegressor( - estimator=qt, - **STRATEGIES[strategy] - ) - mapie2 = MapieQuantileRegressor( - estimator=qt, - **STRATEGIES[strategy] - ) - mapie0.fit( - X_train, - y_train, - X_calib=X_calib, - y_calib=y_calib, - sample_weight=None - ) + mapie0 = MapieQuantileRegressor(estimator=qt, **STRATEGIES[strategy]) + mapie1 = MapieQuantileRegressor(estimator=qt, **STRATEGIES[strategy]) + mapie2 = MapieQuantileRegressor(estimator=qt, **STRATEGIES[strategy]) + mapie0.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib, sample_weight=None) mapie1.fit( - X_train, - y_train, - X_calib=X_calib, - y_calib=y_calib, - sample_weight=np.ones(shape=n_samples) - ) + X_train, y_train, X_calib=X_calib, y_calib=y_calib, sample_weight=np.ones(shape=n_samples) + ) mapie2.fit( X_train, y_train, X_calib=X_calib, y_calib=y_calib, - sample_weight=np.ones(shape=n_samples) * 5 - ) - - np.testing.assert_allclose( - mapie0.conformity_scores_, - mapie1.conformity_scores_ - ) - np.testing.assert_allclose( - mapie0.conformity_scores_, - mapie2.conformity_scores_ + sample_weight=np.ones(shape=n_samples) * 5, ) + np.testing.assert_allclose(mapie0.conformity_scores_, mapie1.conformity_scores_) + np.testing.assert_allclose(mapie0.conformity_scores_, mapie2.conformity_scores_) + y_pred0, y_pis0 = mapie0.predict(X) y_pred1, y_pis1 = mapie1.predict(X) y_pred2, y_pis2 = mapie2.predict(X) @@ -325,18 +238,12 @@ def test_results_with_constant_sample_weights( @pytest.mark.parametrize("estimator", ESTIMATOR) @pytest.mark.parametrize("symmetry", SYMMETRY) -def test_results_for_same_alpha( - estimator: RegressorMixin, - symmetry: bool -) -> None: +def test_results_for_same_alpha(estimator: RegressorMixin, symmetry: bool) -> None: """ Test that predictions and intervals are similar with two equal values of alpha. """ - mapie_reg = MapieQuantileRegressor( - estimator=estimator, - alpha=0.2 - ) + mapie_reg = MapieQuantileRegressor(estimator=estimator, alpha=0.2) mapie_reg_clone = clone(mapie_reg) mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib) mapie_reg_clone.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib) @@ -375,9 +282,7 @@ def test_estimators_quantile_function() -> None: ValueError, match=r".*You need to set the loss/objective*", ): - mapie_reg = MapieQuantileRegressor( - estimator=GradientBoostingRegressor() - ) + mapie_reg = MapieQuantileRegressor(estimator=GradientBoostingRegressor()) mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib) @@ -389,24 +294,14 @@ def test_invalid_cv(cv: Any) -> None: match=r".*Invalid cv method.*", ): mapie = MapieQuantileRegressor(cv=cv) - mapie.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) @pytest.mark.parametrize("cv", [None, "split"]) def test_valid_cv(cv: Any) -> None: """Test that valid cv raise no errors.""" mapie = MapieQuantileRegressor(cv=cv) - mapie.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) def test_calib_dataset_is_none() -> None: @@ -452,12 +347,7 @@ def test_estimators_not_in_list(est: RegressorMixin) -> None: match=r".*The base model does not seem to be accepted by.*", ): mapie_reg = MapieQuantileRegressor(estimator=est) - mapie_reg.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie_reg.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) def test_for_small_dataset() -> None: @@ -466,24 +356,21 @@ def test_for_small_dataset() -> None: ValueError, match=r".*Number of samples of the score is too low*", ): - mapie_reg = MapieQuantileRegressor( - estimator=qt, - alpha=0.1 - ) + mapie_reg = MapieQuantileRegressor(estimator=qt, alpha=0.1) mapie_reg.fit( np.array([1, 2, 3]), np.array([2, 2, 3]), X_calib=np.array([3, 5]), - y_calib=np.array([2, 3]) + y_calib=np.array([2, 3]), ) @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("estimator", ESTIMATOR) -@pytest.mark.parametrize("dataset", [ - (X_train, X_calib, y_train, y_calib), - (X_train_toy, X_calib_toy, y_train_toy, y_calib_toy) -]) +@pytest.mark.parametrize( + "dataset", + [(X_train, X_calib, y_train, y_calib), (X_train_toy, X_calib_toy, y_train_toy, y_calib_toy)], +) def test_conformity_len( strategy: str, estimator: RegressorMixin, @@ -492,10 +379,7 @@ def test_conformity_len( """Test conformity scores output shape.""" (X_t, X_c, y_t, y_c) = dataset n_samples = int(len(X_c)) - mapie_regressor = MapieQuantileRegressor( - estimator=estimator, - **STRATEGIES[strategy] - ) + mapie_regressor = MapieQuantileRegressor(estimator=estimator, **STRATEGIES[strategy]) mapie_regressor.fit(X_t, y_t, X_calib=X_c, y_calib=y_c) assert mapie_regressor.conformity_scores_[0].shape[0] == n_samples @@ -530,14 +414,8 @@ def test_quantile_prefit_three_estimators() -> None: gb_trained1.fit(X_train, y_train) gb_trained2.fit(X_train, y_train) list_estimators = [gb_trained1, gb_trained2] - mapie_reg = MapieQuantileRegressor( - estimator=list_estimators, - cv="prefit" - ) - mapie_reg.fit( - X_calib, - y_calib - ) + mapie_reg = MapieQuantileRegressor(estimator=list_estimators, cv="prefit") + mapie_reg.fit(X_calib, y_calib) def test_prefit_no_fit_predict() -> None: @@ -553,15 +431,8 @@ def test_prefit_no_fit_predict() -> None: gb_trained2.fit(X_train, y_train) gb_trained3 = 3 list_estimators = [gb_trained1, gb_trained2, gb_trained3] - mapie_reg = MapieQuantileRegressor( - estimator=list_estimators, - cv="prefit", - alpha=0.3 - ) - mapie_reg.fit( - X_calib, - y_calib - ) + mapie_reg = MapieQuantileRegressor(estimator=list_estimators, cv="prefit", alpha=0.3) + mapie_reg.fit(X_calib, y_calib) def test_non_trained_estimator() -> None: @@ -576,15 +447,8 @@ def test_non_trained_estimator() -> None: gb_trained1.fit(X_train, y_train) gb_trained2.fit(X_train, y_train) list_estimators = [gb_trained1, gb_trained2, gb_trained3] - mapie_reg = MapieQuantileRegressor( - estimator=list_estimators, - cv="prefit", - alpha=0.3 - ) - mapie_reg.fit( - X_calib, - y_calib - ) + mapie_reg = MapieQuantileRegressor(estimator=list_estimators, cv="prefit", alpha=0.3) + mapie_reg.fit(X_calib, y_calib) def test_warning_alpha_prefit() -> None: @@ -592,23 +456,15 @@ def test_warning_alpha_prefit() -> None: Check that the user is warned that the alphas need to be correctly set. """ with pytest.warns( - UserWarning, - match=r".*WARNING: The alpha that is set needs to be the same*" + UserWarning, match=r".*WARNING: The alpha that is set needs to be the same*" ): gb_trained1, gb_trained2, gb_trained3 = clone(gb), clone(gb), clone(gb) gb_trained1.fit(X_train, y_train) gb_trained2.fit(X_train, y_train) gb_trained3.fit(X_train, y_train) list_estimators = [gb_trained1, gb_trained2, gb_trained3] - mapie_reg = MapieQuantileRegressor( - estimator=list_estimators, - cv="prefit", - alpha=0.3 - ) - mapie_reg.fit( - X_calib, - y_calib - ) + mapie_reg = MapieQuantileRegressor(estimator=list_estimators, cv="prefit", alpha=0.3) + mapie_reg.fit(X_calib, y_calib) @pytest.mark.parametrize("alpha", [0.05, 0.1, 0.2, 0.3]) @@ -618,25 +474,18 @@ def test_prefit_and_non_prefit_equal(alpha: float) -> None: are found. """ list_estimators = [] - alphas_ = [alpha/2, 1-(alpha/2), 0.5] + alphas_ = [alpha / 2, 1 - (alpha / 2), 0.5] for alpha_ in alphas_: est = clone(qt) params = {"quantile": alpha_} est.set_params(**params) est.fit(X_train, y_train) list_estimators.append(est) - mapie_reg_prefit = MapieQuantileRegressor( - estimator=list_estimators, - cv="prefit", - alpha=alpha - ) + mapie_reg_prefit = MapieQuantileRegressor(estimator=list_estimators, cv="prefit", alpha=alpha) mapie_reg_prefit.fit(X_calib, y_calib) y_pred_prefit, y_pis_prefit = mapie_reg_prefit.predict(X) - mapie_reg = MapieQuantileRegressor( - estimator=qt, - alpha=alpha - ) + mapie_reg = MapieQuantileRegressor(estimator=qt, alpha=alpha) mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib) y_pred, y_pis = mapie_reg.predict(X) @@ -651,7 +500,7 @@ def test_prefit_different_type_list_tuple_array(alpha: float) -> None: estimators gives similar results. """ list_estimators = [] - alphas_ = [alpha/2, 1-(alpha/2), 0.5] + alphas_ = [alpha / 2, 1 - (alpha / 2), 0.5] for alpha_ in alphas_: est = clone(qt) params = {"quantile": alpha_} @@ -660,25 +509,19 @@ def test_prefit_different_type_list_tuple_array(alpha: float) -> None: list_estimators.append(est) mapie_reg_prefit_list = MapieQuantileRegressor( - estimator=list_estimators, - cv="prefit", - alpha=alpha + estimator=list_estimators, cv="prefit", alpha=alpha ) mapie_reg_prefit_list.fit(X_calib, y_calib) y_pred_prefit_list, y_pis_prefit_list = mapie_reg_prefit_list.predict(X) mapie_reg_prefit_tuple = MapieQuantileRegressor( - estimator=tuple(list_estimators), - cv="prefit", - alpha=alpha + estimator=tuple(list_estimators), cv="prefit", alpha=alpha ) mapie_reg_prefit_tuple.fit(X_calib, y_calib) y_pred_prefit_tuple, y_pis_prefit_tuple = mapie_reg_prefit_tuple.predict(X) mapie_reg_prefit_array = MapieQuantileRegressor( - estimator=np.array(list_estimators), - cv="prefit", - alpha=alpha + estimator=np.array(list_estimators), cv="prefit", alpha=alpha ) mapie_reg_prefit_array.fit(X_calib, y_calib) y_pred_prefit_array, y_pis_prefit_array = mapie_reg_prefit_array.predict(X) @@ -697,15 +540,12 @@ def test_pipeline_compatibility(estimator: RegressorMixin) -> None: { "x_cat": ["A", "A", "B", "A", "A", "B", "A", "B", "B", "B"], "x_num": [0, 1, 1, 4, np.nan, 5, 4, 3, np.nan, 3], - "y": [5, 7, 3, 9, 10, 8, 9, 7, 9, 8] + "y": [5, 7, 3, 9, 10, 8, 9, 7, 9, 8], } ) y = pd.Series([5, 7, 3, 9, 10, 8, 9, 7, 10, 5]) X_train_toy, X_calib_toy, y_train_toy, y_calib_toy = train_test_split( - X, - y, - test_size=0.5, - random_state=random_state + X, y, test_size=0.5, random_state=random_state ) numeric_preprocessor = Pipeline( [ @@ -713,24 +553,14 @@ def test_pipeline_compatibility(estimator: RegressorMixin) -> None: ] ) categorical_preprocessor = Pipeline( - steps=[ - ("encoding", OneHotEncoder(handle_unknown="ignore")) - ] + steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))] ) preprocessor = ColumnTransformer( - [ - ("cat", categorical_preprocessor, ["x_cat"]), - ("num", numeric_preprocessor, ["x_num"]) - ] + [("cat", categorical_preprocessor, ["x_cat"]), ("num", numeric_preprocessor, ["x_num"])] ) pipe = make_pipeline(preprocessor, estimator) mapie = MapieQuantileRegressor(pipe, alpha=0.4) - mapie.fit( - X_train_toy, - y_train_toy, - X_calib=X_calib_toy, - y_calib=y_calib_toy - ) + mapie.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) mapie.predict(X) @@ -738,11 +568,9 @@ def test_deprecated_path_warning() -> None: """ Test that a warning is raised if import with deprecated path. """ - with pytest.warns( - FutureWarning, - match=r".*WARNING: Deprecated path*" - ): + with pytest.warns(FutureWarning, match=r".*WARNING: Deprecated path*"): from mapie.quantile_regression import MapieQuantileRegressor + _ = MapieQuantileRegressor() diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 14557982..ad7aa333 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -11,8 +11,7 @@ from sklearn.dummy import DummyRegressor from sklearn.impute import SimpleImputer from sklearn.linear_model import LinearRegression -from sklearn.model_selection import (KFold, LeaveOneOut, ShuffleSplit, - train_test_split) +from sklearn.model_selection import KFold, LeaveOneOut, ShuffleSplit, train_test_split from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import OneHotEncoder from sklearn.utils.validation import check_is_fitted @@ -20,10 +19,12 @@ from mapie._typing import NDArray from mapie.aggregation_functions import aggregate_all -from mapie.conformity_scores import (AbsoluteConformityScore, - ConformityScore, - GammaConformityScore, - ResidualNormalisedScore) +from mapie.conformity_scores import ( + AbsoluteConformityScore, + ConformityScore, + GammaConformityScore, + ResidualNormalisedScore, +) from mapie.metrics import regression_coverage_score from mapie.regression import MapieRegressor from mapie.estimator.estimator import EnsembleRegressor @@ -31,9 +32,7 @@ X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1) y_toy = np.array([5, 7, 9, 11, 13, 15]) -X, y = make_regression( - n_samples=500, n_features=10, noise=1.0, random_state=1 -) +X, y = make_regression(n_samples=500, n_features=10, noise=1.0, random_state=1) k = np.ones(shape=(5, X.shape[1])) METHODS = ["naive", "base", "plus", "minmax"] @@ -51,81 +50,61 @@ ) STRATEGIES = { "naive": Params( - method="naive", - agg_function="median", - cv=None, - test_size=None, - random_state=random_state + method="naive", agg_function="median", cv=None, test_size=None, random_state=random_state ), "split": Params( - method="base", - agg_function="median", - cv="split", - test_size=0.5, - random_state=random_state + method="base", agg_function="median", cv="split", test_size=0.5, random_state=random_state ), "jackknife": Params( - method="base", - agg_function="mean", - cv=-1, - test_size=None, - random_state=random_state + method="base", agg_function="mean", cv=-1, test_size=None, random_state=random_state ), "jackknife_plus": Params( - method="plus", - agg_function="mean", - cv=-1, - test_size=None, - random_state=random_state + method="plus", agg_function="mean", cv=-1, test_size=None, random_state=random_state ), "jackknife_minmax": Params( - method="minmax", - agg_function="mean", - cv=-1, - test_size=None, - random_state=random_state + method="minmax", agg_function="mean", cv=-1, test_size=None, random_state=random_state ), "cv": Params( method="base", agg_function="mean", cv=KFold(n_splits=3, shuffle=True, random_state=random_state), test_size=None, - random_state=random_state + random_state=random_state, ), "cv_plus": Params( method="plus", agg_function="mean", cv=KFold(n_splits=3, shuffle=True, random_state=random_state), test_size=None, - random_state=random_state + random_state=random_state, ), "cv_minmax": Params( method="minmax", agg_function="mean", cv=KFold(n_splits=3, shuffle=True, random_state=random_state), test_size=None, - random_state=random_state + random_state=random_state, ), "jackknife_plus_ab": Params( method="plus", agg_function="mean", cv=Subsample(n_resamplings=30, random_state=random_state), test_size=None, - random_state=random_state + random_state=random_state, ), "jackknife_minmax_ab": Params( method="minmax", agg_function="mean", cv=Subsample(n_resamplings=30, random_state=random_state), test_size=None, - random_state=random_state + random_state=random_state, ), "jackknife_plus_median_ab": Params( method="plus", agg_function="median", cv=Subsample(n_resamplings=30, random_state=random_state), test_size=None, - random_state=random_state + random_state=random_state, ), } @@ -172,9 +151,7 @@ def test_default_parameters() -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) def test_valid_estimator(strategy: str) -> None: """Test that valid estimators are not corrupted, for all strategies.""" - mapie_reg = MapieRegressor( - estimator=DummyRegressor(), **STRATEGIES[strategy] - ) + mapie_reg = MapieRegressor(estimator=DummyRegressor(), **STRATEGIES[strategy]) mapie_reg.fit(X_toy, y_toy) assert isinstance(mapie_reg.estimator_.single_estimator_, DummyRegressor) for estimator in mapie_reg.estimator_.estimators_: @@ -210,8 +187,7 @@ def test_valid_agg_function(agg_function: str) -> None: @pytest.mark.parametrize( - "cv", [None, -1, 2, KFold(), LeaveOneOut(), - ShuffleSplit(n_splits=1), "prefit", "split"] + "cv", [None, -1, 2, KFold(), LeaveOneOut(), ShuffleSplit(n_splits=1), "prefit", "split"] ) def test_valid_cv(cv: Any) -> None: """Test that valid cv raise no errors.""" @@ -236,9 +212,7 @@ def test_too_large_cv(cv: Any) -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("dataset", [(X, y), (X_toy, y_toy)]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.4], (0.2, 0.4)]) -def test_predict_output_shape( - strategy: str, alpha: Any, dataset: Tuple[NDArray, NDArray] -) -> None: +def test_predict_output_shape(strategy: str, alpha: Any, dataset: Tuple[NDArray, NDArray]) -> None: """Test predict output shape.""" mapie_reg = MapieRegressor(**STRATEGIES[strategy]) (X, y) = dataset @@ -254,9 +228,7 @@ def test_same_results_prefit_split() -> None: Test checking that if split and prefit method have exactly the same data split, then we have exactly the same results. """ - X, y = make_regression( - n_samples=500, n_features=10, noise=1.0, random_state=1 - ) + X, y = make_regression(n_samples=500, n_features=10, noise=1.0, random_state=1) cv = ShuffleSplit(n_splits=1, test_size=0.1, random_state=random_state) train_index, val_index = list(cv.split(X))[0] X_train, X_calib = X[train_index], X[val_index] @@ -290,12 +262,8 @@ def test_results_for_same_alpha(strategy: str) -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) -@pytest.mark.parametrize( - "alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)] -) -def test_results_for_alpha_as_float_and_arraylike( - strategy: str, alpha: Any -) -> None: +@pytest.mark.parametrize("alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)]) +def test_results_for_alpha_as_float_and_arraylike(strategy: str, alpha: Any) -> None: """Test that output values do not depend on type of alpha.""" mapie_reg = MapieRegressor(**STRATEGIES[strategy]) mapie_reg.fit(X, y) @@ -421,9 +389,7 @@ def test_results_prefit_ignore_method() -> None: estimator = LinearRegression().fit(X, y) all_y_pis: List[NDArray] = [] for method in METHODS: - mapie_reg = MapieRegressor( - estimator=estimator, cv="prefit", method=method - ) + mapie_reg = MapieRegressor(estimator=estimator, cv="prefit", method=method) mapie_reg.fit(X, y) _, y_pis = mapie_reg.predict(X, alpha=0.1) all_y_pis.append(y_pis) @@ -459,9 +425,7 @@ def test_results_prefit() -> None: mapie_reg.fit(X_val, y_val) _, y_pis = mapie_reg.predict(X_test, alpha=0.05) width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean() - coverage = regression_coverage_score( - y_test, y_pis[:, 0, 0], y_pis[:, 1, 0] - ) + coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]) np.testing.assert_allclose(width_mean, WIDTHS["prefit"], rtol=1e-2) np.testing.assert_allclose(coverage, COVERAGES["prefit"], rtol=1e-2) @@ -471,9 +435,7 @@ def test_not_enough_resamplings() -> None: Test that a warning is raised if at least one conformity score is nan. """ with pytest.warns(UserWarning, match=r"WARNING: at least one point of*"): - mapie_reg = MapieRegressor( - cv=Subsample(n_resamplings=1), agg_function="mean" - ) + mapie_reg = MapieRegressor(cv=Subsample(n_resamplings=1), agg_function="mean") mapie_reg.fit(X, y) @@ -481,12 +443,8 @@ def test_no_agg_fx_specified_with_subsample() -> None: """ Test that a warning is raised if at least one conformity score is nan. """ - with pytest.raises( - ValueError, match=r"You need to specify an aggregation*" - ): - mapie_reg = MapieRegressor( - cv=Subsample(n_resamplings=1), agg_function=None - ) + with pytest.raises(ValueError, match=r"You need to specify an aggregation*"): + mapie_reg = MapieRegressor(cv=Subsample(n_resamplings=1), agg_function=None) mapie_reg.fit(X, y) @@ -524,7 +482,7 @@ def test_aggregate_with_mask_with_invalid_agg_function() -> None: None, random_state, 0.20, - False + False, ) with pytest.raises( ValueError, @@ -577,35 +535,21 @@ def test_pipeline_compatibility() -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) -@pytest.mark.parametrize( - "conformity_score", [AbsoluteConformityScore(), GammaConformityScore()] -) -def test_conformity_score( - strategy: str, conformity_score: ConformityScore -) -> None: +@pytest.mark.parametrize("conformity_score", [AbsoluteConformityScore(), GammaConformityScore()]) +def test_conformity_score(strategy: str, conformity_score: ConformityScore) -> None: """Test that any conformity score function with MAPIE raises no error.""" - mapie_reg = MapieRegressor( - conformity_score=conformity_score, - **STRATEGIES[strategy] - ) + mapie_reg = MapieRegressor(conformity_score=conformity_score, **STRATEGIES[strategy]) mapie_reg.fit(X, y + 1e3) mapie_reg.predict(X, alpha=0.05) -@pytest.mark.parametrize( - "conformity_score", [ResidualNormalisedScore()] -) -def test_conformity_score_with_split_strategies( - conformity_score: ConformityScore -) -> None: +@pytest.mark.parametrize("conformity_score", [ResidualNormalisedScore()]) +def test_conformity_score_with_split_strategies(conformity_score: ConformityScore) -> None: """ Test that any conformity score function that handle only split strategies with MAPIE raises no error. """ - mapie_reg = MapieRegressor( - conformity_score=conformity_score, - **STRATEGIES["split"] - ) + mapie_reg = MapieRegressor(conformity_score=conformity_score, **STRATEGIES["split"]) mapie_reg.fit(X, y + 1e3) mapie_reg.predict(X, alpha=0.05) @@ -615,9 +559,7 @@ def test_return_only_ypred(ensemble: bool) -> None: """Test that if return_multi_pred is False it only returns y_pred.""" mapie_reg = MapieRegressor() mapie_reg.fit(X_toy, y_toy) - output = mapie_reg.estimator_.predict( - X_toy, ensemble=ensemble, return_multi_pred=False - ) + output = mapie_reg.estimator_.predict(X_toy, ensemble=ensemble, return_multi_pred=False) assert len(output) == len(X_toy) @@ -628,7 +570,5 @@ def test_return_multi_pred(ensemble: bool) -> None: """ mapie_reg = MapieRegressor() mapie_reg.fit(X_toy, y_toy) - output = mapie_reg.estimator_.predict( - X_toy, ensemble=ensemble, return_multi_pred=True - ) + output = mapie_reg.estimator_.predict(X_toy, ensemble=ensemble, return_multi_pred=True) assert len(output) == 3 diff --git a/mapie/tests/test_subsample.py b/mapie/tests/test_subsample.py index affe8105..1b1bec90 100644 --- a/mapie/tests/test_subsample.py +++ b/mapie/tests/test_subsample.py @@ -51,9 +51,7 @@ def test_get_n_splits_BlockBootstrap() -> None: def test_split_BlockBootstrap() -> None: """Test outputs of subsamplings.""" X = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - cv = BlockBootstrap( - n_resamplings=1, length=2, overlapping=False, random_state=1 - ) + cv = BlockBootstrap(n_resamplings=1, length=2, overlapping=False, random_state=1) trains = np.concatenate([x[0] for x in cv.split(X)]) tests = np.concatenate([x[1] for x in cv.split(X)]) trains_expected = np.array([7, 8, 9, 10, 1, 2, 3, 4, 7, 8, 1, 2]) @@ -62,9 +60,7 @@ def test_split_BlockBootstrap() -> None: np.testing.assert_equal(tests, tests_expected) X = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - cv = BlockBootstrap( - n_resamplings=1, length=2, overlapping=True, random_state=1 - ) + cv = BlockBootstrap(n_resamplings=1, length=2, overlapping=True, random_state=1) trains = np.concatenate([x[0] for x in cv.split(X)]) tests = np.concatenate([x[1] for x in cv.split(X)]) trains_expected = np.array([5, 6, 8, 9, 9, 10, 5, 6, 0, 1, 0, 1]) diff --git a/mapie/tests/test_time_series_regression.py b/mapie/tests/test_time_series_regression.py index 110b423a..1e632492 100644 --- a/mapie/tests/test_time_series_regression.py +++ b/mapie/tests/test_time_series_regression.py @@ -18,10 +18,8 @@ random_state = 1 X_toy = np.array(range(5)).reshape(-1, 1) -y_toy = (5.0 + 2.0 * X_toy ** 1.1).flatten() -X, y = make_regression( - n_samples=500, n_features=10, noise=1.0, random_state=random_state -) +y_toy = (5.0 + 2.0 * X_toy**1.1).flatten() +X, y = make_regression(n_samples=500, n_features=10, noise=1.0, random_state=random_state) k = np.ones(shape=(5, X.shape[1])) METHODS = ["enbpi"] UPDATE_DATA = ([6], 17.5) @@ -39,9 +37,7 @@ "jackknife_enbpi_mean_ab_wopt": Params( method="enbpi", agg_function="mean", - cv=BlockBootstrap( - n_resamplings=30, n_blocks=5, random_state=random_state - ), + cv=BlockBootstrap(n_resamplings=30, n_blocks=5, random_state=random_state), ), "jackknife_enbpi_median_ab_wopt": Params( method="enbpi", @@ -55,9 +51,7 @@ "jackknife_enbpi_mean_ab": Params( method="enbpi", agg_function="mean", - cv=BlockBootstrap( - n_resamplings=30, n_blocks=5, random_state=random_state - ), + cv=BlockBootstrap(n_resamplings=30, n_blocks=5, random_state=random_state), ), "jackknife_enbpi_median_ab": Params( method="enbpi", @@ -76,7 +70,6 @@ "jackknife_enbpi_mean_ab": 3.76, "jackknife_enbpi_median_ab": 3.76, "prefit": 4.79, - } COVERAGES = { @@ -85,7 +78,6 @@ "jackknife_enbpi_mean_ab": 0.952, "jackknife_enbpi_median_ab": 0.946, "prefit": 0.98, - } @@ -112,9 +104,7 @@ def test_invalid_agg_function(agg_function: Any) -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) @pytest.mark.parametrize("dataset", [(X, y), (X_toy, y_toy)]) @pytest.mark.parametrize("alpha", [0.2, [0.2, 0.4], (0.2, 0.4)]) -def test_predict_output_shape( - strategy: str, alpha: Any, dataset: Tuple[NDArray, NDArray] -) -> None: +def test_predict_output_shape(strategy: str, alpha: Any, dataset: Tuple[NDArray, NDArray]) -> None: """Test predict output shape.""" mapie_ts_reg = MapieTimeSeriesRegressor(**STRATEGIES[strategy]) (X, y) = dataset @@ -139,12 +129,8 @@ def test_results_for_same_alpha(strategy: str) -> None: @pytest.mark.parametrize("strategy", [*STRATEGIES]) -@pytest.mark.parametrize( - "alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)] -) -def test_results_for_alpha_as_float_and_arraylike( - strategy: str, alpha: Any -) -> None: +@pytest.mark.parametrize("alpha", [np.array([0.05, 0.1]), [0.05, 0.1], (0.05, 0.1)]) +def test_results_for_alpha_as_float_and_arraylike(strategy: str, alpha: Any) -> None: """Test that output values do not depend on type of alpha.""" mapie_ts_reg = MapieTimeSeriesRegressor(**STRATEGIES[strategy]) mapie_ts_reg.fit(X, y) @@ -167,8 +153,7 @@ def test_results_for_ordered_alpha(strategy: str) -> None: mapie.fit(X, y) y_pred, y_pis = mapie.predict(X, alpha=[0.05, 0.1]) assert np.all( - np.abs(y_pis[:, 1, 0] - y_pis[:, 0, 0]) - >= np.abs(y_pis[:, 1, 1] - y_pis[:, 0, 1]) + np.abs(y_pis[:, 1, 0] - y_pis[:, 0, 0]) >= np.abs(y_pis[:, 1, 1] - y_pis[:, 0, 1]) ) @@ -221,9 +206,7 @@ def test_prediction_agg_function( Test that PIs are the same but predictions differ when ensemble is True or False. """ - mapie = MapieTimeSeriesRegressor( - method=method, cv=cv, agg_function=agg_function - ) + mapie = MapieTimeSeriesRegressor(method=method, cv=cv, agg_function=agg_function) mapie.fit(X, y) y_pred_1, y_pis_1 = mapie.predict(X, ensemble=True, alpha=alpha) y_pred_2, y_pis_2 = mapie.predict(X, ensemble=False, alpha=alpha) @@ -269,19 +252,14 @@ def test_results_prefit() -> None: mapie_ts_reg.fit(X_val, y_val) _, y_pis = mapie_ts_reg.predict(X_test, alpha=0.05) width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean() - coverage = regression_coverage_score( - y_test, y_pis[:, 0, 0], y_pis[:, 1, 0] - ) + coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]) np.testing.assert_allclose(width_mean, WIDTHS["prefit"], rtol=1e-2) np.testing.assert_allclose(coverage, COVERAGES["prefit"], rtol=1e-2) def test_not_enough_resamplings() -> None: """Test that a warning is raised if at least one residual is nan.""" - with pytest.warns( - UserWarning, - match=r"WARNING: at least one point of*" - ): + with pytest.warns(UserWarning, match=r"WARNING: at least one point of*"): mapie_ts_reg = MapieTimeSeriesRegressor( cv=BlockBootstrap(n_resamplings=1, n_blocks=1), agg_function="mean" ) @@ -293,9 +271,7 @@ def test_no_agg_fx_specified_with_subsample() -> None: Test that an error is raised if ``cv`` is ``BlockBootstrap`` but ``agg_function`` is ``None``. """ - with pytest.raises( - ValueError, match=r"You need to specify an aggregation*" - ): + with pytest.raises(ValueError, match=r"You need to specify an aggregation*"): mapie_ts_reg = MapieTimeSeriesRegressor( cv=BlockBootstrap(n_resamplings=1, n_blocks=1), agg_function=None, @@ -337,15 +313,11 @@ def test_MapieTimeSeriesRegressor_if_alpha_is_None() -> None: def test_MapieTimeSeriesRegressor_partial_fit_ensemble() -> None: """Test ``partial_fit``.""" mapie_ts_reg = MapieTimeSeriesRegressor(cv=-1).fit(X_toy, y_toy) - assert round(mapie_ts_reg.conformity_scores_[-1], 2) == round( - np.abs(CONFORMITY_SCORES[0]), 2 - ) + assert round(mapie_ts_reg.conformity_scores_[-1], 2) == round(np.abs(CONFORMITY_SCORES[0]), 2) mapie_ts_reg = mapie_ts_reg.partial_fit( X=np.array([UPDATE_DATA[0]]), y=np.array([UPDATE_DATA[1]]) ) - assert round(mapie_ts_reg.conformity_scores_[-1], 2) == round( - CONFORMITY_SCORES[1], 2 - ) + assert round(mapie_ts_reg.conformity_scores_[-1], 2) == round(CONFORMITY_SCORES[1], 2) def test_MapieTimeSeriesRegressor_partial_fit_too_big() -> None: @@ -359,20 +331,16 @@ def test_MapieTimeSeriesRegressor_beta_optimize_eeror() -> None: """Test ``beta_optimize`` raised error.""" mapie_ts_reg = MapieTimeSeriesRegressor(cv=-1) with pytest.raises(ValueError, match=r".*Lower and upper bounds arrays*"): - mapie_ts_reg._beta_optimize( - alpha=0.1, upper_bounds=X, lower_bounds=X_toy - ) + mapie_ts_reg._beta_optimize(alpha=0.1, upper_bounds=X, lower_bounds=X_toy) def test_deprecated_path_warning() -> None: """ Test that a warning is raised if import with deprecated path. """ - with pytest.warns( - FutureWarning, - match=r".*WARNING: Deprecated path*" - ): + with pytest.warns(FutureWarning, match=r".*WARNING: Deprecated path*"): from mapie.time_series_regression import MapieTimeSeriesRegressor + _ = MapieTimeSeriesRegressor() diff --git a/mapie/tests/test_utils.py b/mapie/tests/test_utils.py index 32517eb8..f0c3f3f2 100644 --- a/mapie/tests/test_utils.py +++ b/mapie/tests/test_utils.py @@ -12,21 +12,29 @@ from mapie._typing import ArrayLike, NDArray from mapie.regression import MapieQuantileRegressor -from mapie.utils import (check_alpha, check_alpha_and_n_samples, - check_binary_zero_one, check_cv, - check_lower_upper_bounds, check_n_features_in, - check_n_jobs, check_null_weight, check_number_bins, - check_split_strategy, check_verbose, - compute_quantiles, fit_estimator, get_binning_groups) +from mapie.utils import ( + check_alpha, + check_alpha_and_n_samples, + check_binary_zero_one, + check_cv, + check_lower_upper_bounds, + check_n_features_in, + check_n_jobs, + check_null_weight, + check_number_bins, + check_split_strategy, + check_verbose, + compute_quantiles, + fit_estimator, + get_binning_groups, +) X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1) y_toy = np.array([5, 7, 9, 11, 13, 15]) n_features = 10 -X, y = make_regression( - n_samples=500, n_features=n_features, noise=1.0, random_state=1 -) +X, y = make_regression(n_samples=500, n_features=n_features, noise=1.0, random_state=1) ALPHAS = [ np.array([0.1]), np.array([0.05, 0.1, 0.2]), @@ -39,31 +47,47 @@ y_true = prng.randint(0, 2, 51) results_binning = { - "quantile": - [ - 0.03075388, 0.17261836, 0.33281326, 0.43939618, - 0.54867626, 0.64881987, 0.73440899, 0.77793816, - 0.89000413, 0.99610621 - ], - "uniform": - [ - 0, 0.11111111, 0.22222222, 0.33333333, 0.44444444, - 0.55555556, 0.66666667, 0.77777778, 0.88888889, 1 - ], - "array split": - [ - 0.62689056, 0.74743526, 0.87642114, 0.88321124, - 0.8916548, 0.94083846, 0.94999075, 0.98759822, - 0.99610621, np.inf - ], + "quantile": [ + 0.03075388, + 0.17261836, + 0.33281326, + 0.43939618, + 0.54867626, + 0.64881987, + 0.73440899, + 0.77793816, + 0.89000413, + 0.99610621, + ], + "uniform": [ + 0, + 0.11111111, + 0.22222222, + 0.33333333, + 0.44444444, + 0.55555556, + 0.66666667, + 0.77777778, + 0.88888889, + 1, + ], + "array split": [ + 0.62689056, + 0.74743526, + 0.87642114, + 0.88321124, + 0.8916548, + 0.94083846, + 0.94999075, + 0.98759822, + 0.99610621, + np.inf, + ], } class DumbEstimator: - def fit( - self, - X: ArrayLike, - y: Optional[ArrayLike] = None) -> DumbEstimator: + def fit(self, X: ArrayLike, y: Optional[ArrayLike] = None) -> DumbEstimator: self.fitted_ = True return self @@ -91,20 +115,13 @@ def test_check_null_weight_with_zeros() -> None: sample_weight[:1] = 0.0 sw_out, X_out, y_out = check_null_weight(sample_weight, X_toy, y_toy) np.testing.assert_almost_equal(np.array(sw_out), np.array([1, 1, 1, 1, 1])) - np.testing.assert_almost_equal( - np.array(X_out), np.array([[1], [2], [3], [4], [5]]) - ) - np.testing.assert_almost_equal( - np.array(y_out), np.array([7, 9, 11, 13, 15]) - ) + np.testing.assert_almost_equal(np.array(X_out), np.array([[1], [2], [3], [4], [5]])) + np.testing.assert_almost_equal(np.array(y_out), np.array([7, 9, 11, 13, 15])) @pytest.mark.parametrize("estimator", [LinearRegression(), DumbEstimator()]) @pytest.mark.parametrize("sample_weight", [None, np.ones_like(y_toy)]) -def test_fit_estimator( - estimator: Any, - sample_weight: Optional[NDArray] -) -> None: +def test_fit_estimator(estimator: Any, sample_weight: Optional[NDArray]) -> None: """Test that the returned estimator is always fitted.""" estimator = fit_estimator(estimator, X_toy, y_toy, sample_weight) check_is_fitted(estimator) @@ -183,9 +200,7 @@ def test_valid_calculation_of_quantile(alpha: Any) -> None: def test_invalid_calculation_of_quantile(alpha: Any) -> None: """Test that alpha with 1/alpha > number of samples raise errors.""" n = 10 - with pytest.raises( - ValueError, match=r".*Number of samples of the score is too low.*" - ): + with pytest.raises(ValueError, match=r".*Number of samples of the score is too low.*"): check_alpha_and_n_samples(alpha, n) @@ -235,9 +250,7 @@ def test_initial_low_high_pred() -> None: def test_final_low_high_pred() -> None: """Test lower/upper predictions crossing""" - y_preds = np.array( - [[4, 3, 2], [3, 3, 3], [2, 3, 4]] - ) + y_preds = np.array([[4, 3, 2], [3, 3, 3], [2, 3, 4]]) y_pred_low = np.array([4, 3, 2]) y_pred_up = np.array([3, 3, 3]) with pytest.warns(UserWarning, match=r"WARNING: The predictions of .*"): @@ -257,9 +270,7 @@ def test_ensemble_in_predict() -> None: """Checking for ensemble defined in predict of CQR""" mapie_reg = MapieQuantileRegressor() mapie_reg.fit(X, y) - with pytest.warns( - UserWarning, match=r"WARNING: Alpha should not be spec.*" - ): + with pytest.warns(UserWarning, match=r"WARNING: Alpha should not be spec.*"): mapie_reg.predict(X, alpha=0.2) @@ -354,14 +365,8 @@ def test_quantile_prefit_non_iterable(estimator: Any) -> None: @pytest.mark.parametrize("strategy", ["quantile", "uniform", "array split"]) def test_binning_group_strategies(strategy: str) -> None: """Test that different strategies have the correct outputs.""" - bins_ = get_binning_groups( - y_score, num_bins=10, strategy=strategy - ) - np.testing.assert_allclose( - results_binning[strategy], - bins_, - rtol=1e-05 - ) + bins_ = get_binning_groups(y_score, num_bins=10, strategy=strategy) + np.testing.assert_allclose(results_binning[strategy], bins_, rtol=1e-05) def test_wrong_split_strategy() -> None: @@ -379,19 +384,13 @@ def test_split_strategy_None() -> None: @pytest.mark.parametrize("bins", ["random", LinearRegression(), 0.5]) def test_num_bins_not_int(bins: int) -> None: """Test input for bins is an integer.""" - with pytest.raises( - ValueError, - match=r"Please provide a bin number as an int*" - ): + with pytest.raises(ValueError, match=r"Please provide a bin number as an int*"): check_number_bins(num_bins=bins) def test_num_bins_below_zero() -> None: """Test input for bins is positive integer.""" - with pytest.raises( - ValueError, - match=r"Please provide a bin number greater*" - ): + with pytest.raises(ValueError, match=r"Please provide a bin number greater*"): check_number_bins(num_bins=-1) @@ -399,10 +398,7 @@ def test_binary_target() -> None: """ Test that input of binary will provide an error message for non binary. """ - with pytest.raises( - ValueError, - match=r"Please provide y_true as a bina*" - ): + with pytest.raises(ValueError, match=r"Please provide y_true as a bina*"): check_binary_zero_one(np.array([0, 5, 4])) diff --git a/mapie/utils.py b/mapie/utils.py index 00e57782..1906df72 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -5,14 +5,23 @@ import numpy as np from sklearn.base import ClassifierMixin, RegressorMixin from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import (BaseCrossValidator, KFold, LeaveOneOut, - BaseShuffleSplit, ShuffleSplit, - train_test_split) +from sklearn.model_selection import ( + BaseCrossValidator, + KFold, + LeaveOneOut, + BaseShuffleSplit, + ShuffleSplit, + train_test_split, +) from sklearn.pipeline import Pipeline from sklearn.utils import _safe_indexing from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import (_check_sample_weight, _num_features, - check_is_fitted, column_or_1d) +from sklearn.utils.validation import ( + _check_sample_weight, + _num_features, + check_is_fitted, + column_or_1d, +) from ._compatibility import np_quantile from ._typing import ArrayLike, NDArray @@ -175,21 +184,14 @@ def check_cv( random_seeds = cast(list, np.random.get_state())[1] random_state = np.random.choice(random_seeds) if cv is None: - return KFold( - n_splits=5, shuffle=True, random_state=random_state - ) + return KFold(n_splits=5, shuffle=True, random_state=random_state) elif isinstance(cv, int): if cv == -1: return LeaveOneOut() elif cv >= 2: - return KFold( - n_splits=cv, shuffle=True, random_state=random_state - ) + return KFold(n_splits=cv, shuffle=True, random_state=random_state) else: - raise ValueError( - "Invalid cv argument. " - "Allowed integer values are -1 or int >= 2." - ) + raise ValueError("Invalid cv argument. " "Allowed integer values are -1 or int >= 2.") elif isinstance(cv, BaseCrossValidator): return cv elif isinstance(cv, BaseShuffleSplit): @@ -197,9 +199,7 @@ def check_cv( elif cv == "prefit": return cv elif cv == "split": - return ShuffleSplit( - n_splits=1, test_size=test_size, random_state=random_state - ) + return ShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state) else: raise ValueError( "Invalid cv argument. " @@ -208,9 +208,7 @@ def check_cv( ) -def check_alpha( - alpha: Optional[Union[float, Iterable[float]]] = None -) -> Optional[ArrayLike]: +def check_alpha(alpha: Optional[Union[float, Iterable[float]]] = None) -> Optional[ArrayLike]: """ Check alpha and prepare it as a ArrayLike. @@ -246,18 +244,11 @@ def check_alpha( elif isinstance(alpha, Iterable): alpha_np = np.array(alpha) else: - raise ValueError( - "Invalid alpha. Allowed values are float or Iterable." - ) + raise ValueError("Invalid alpha. Allowed values are float or Iterable.") if len(alpha_np.shape) != 1: - raise ValueError( - "Invalid alpha." - "Please provide a one-dimensional list of values." - ) + raise ValueError("Invalid alpha." "Please provide a one-dimensional list of values.") if alpha_np.dtype.type not in [np.float64, np.float32]: - raise ValueError( - "Invalid alpha. Allowed values are Iterable of floats." - ) + raise ValueError("Invalid alpha. Allowed values are Iterable of floats.") if np.any(np.logical_or(alpha_np <= 0, alpha_np >= 1)): raise ValueError("Invalid alpha. Allowed values are between 0 and 1.") return alpha_np @@ -316,10 +307,7 @@ def check_n_features_in( n_features_in = _num_features(X) if cv == "prefit" and hasattr(estimator, "n_features_in_"): if cast(Any, estimator).n_features_in_ != n_features_in: - raise ValueError( - "Invalid mismatch between ", - "X.shape and estimator.n_features_in_." - ) + raise ValueError("Invalid mismatch between ", "X.shape and estimator.n_features_in_.") return n_features_in @@ -359,6 +347,7 @@ def check_alpha_and_n_samples( """ if isinstance(alphas, float): alphas = np.array([alphas]) + alphas = cast(list, alphas) for alpha in alphas: if n < 1 / alpha or n < 1 / (1 - alpha): raise ValueError( @@ -451,15 +440,12 @@ def check_nan_in_aposteriori_prediction(X: ArrayLike) -> None: """ if np.any(np.all(np.isnan(X), axis=1), axis=0): warnings.warn( - "WARNING: at least one point of training set " - + "belongs to every resamplings.\n" + "WARNING: at least one point of training set " + "belongs to every resamplings.\n" "Increase the number of resamplings" ) -def check_lower_upper_bounds( - y_preds: NDArray, y_pred_low: NDArray, y_pred_up: NDArray -) -> None: +def check_lower_upper_bounds(y_preds: NDArray, y_pred_low: NDArray, y_pred_up: NDArray) -> None: """ Check if the lower or upper bounds are consistent. If check for MapieQuantileRegressor's outputs, then also check @@ -567,8 +553,7 @@ def check_conformity_score( return conformity_score else: raise ValueError( - "Invalid conformity_score argument.\n" - "Must be None or a ConformityScore instance." + "Invalid conformity_score argument.\n" "Must be None or a ConformityScore instance." ) @@ -611,9 +596,7 @@ def check_defined_variables_predict_cqr( WARNING: ensemble is not utilized in ``MapieQuantileRegressor``. """ if ensemble is True: - warnings.warn( - "WARNING: ensemble is not utilized in ``MapieQuantileRegressor``." - ) + warnings.warn("WARNING: ensemble is not utilized in ``MapieQuantileRegressor``.") if alpha is not None: warnings.warn( "WARNING: Alpha should not be specified in the prediction method\n" @@ -621,9 +604,7 @@ def check_defined_variables_predict_cqr( ) -def check_estimator_fit_predict( - estimator: Union[RegressorMixin, ClassifierMixin] -) -> None: +def check_estimator_fit_predict(estimator: Union[RegressorMixin, ClassifierMixin]) -> None: """ Check that the estimator has a fit and precict method. @@ -639,8 +620,7 @@ def check_estimator_fit_predict( """ if not (hasattr(estimator, "fit") and hasattr(estimator, "predict")): raise ValueError( - "Invalid estimator. " - "Please provide a regressor with fit and predict methods." + "Invalid estimator. " "Please provide a regressor with fit and predict methods." ) @@ -720,10 +700,7 @@ def get_calib_set( random_state: Optional[Union[int, np.random.RandomState]] = None, shuffle: Optional[bool] = True, stratify: Optional[ArrayLike] = None, -) -> Tuple[ - ArrayLike, ArrayLike, ArrayLike, ArrayLike, - Optional[NDArray], Optional[NDArray] -]: +) -> Tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike, Optional[NDArray], Optional[NDArray]]: """ Split the dataset into training and calibration sets. @@ -751,41 +728,36 @@ def get_calib_set( sample_weight_calib """ if sample_weight is None: - ( - X_train, X_calib, y_train, y_calib - ) = train_test_split( - X, - y, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify + (X_train, X_calib, y_train, y_calib) = train_test_split( + X, + y, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, ) sample_weight_train = sample_weight sample_weight_calib = None else: ( - X_train, - X_calib, - y_train, - y_calib, - sample_weight_train, - sample_weight_calib, + X_train, + X_calib, + y_train, + y_calib, + sample_weight_train, + sample_weight_calib, ) = train_test_split( - X, - y, - sample_weight, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify + X, + y, + sample_weight, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, ) X_train, X_calib = cast(ArrayLike, X_train), cast(ArrayLike, X_calib) y_train, y_calib = cast(ArrayLike, y_train), cast(ArrayLike, y_calib) - return ( - X_train, y_train, X_calib, y_calib, - sample_weight_train, sample_weight_calib - ) + return (X_train, y_train, X_calib, y_calib, sample_weight_train, sample_weight_calib) def check_estimator_classification( @@ -876,13 +848,7 @@ def get_binning_groups( bins = np.linspace(0.0, 1.0, num_bins) else: bin_groups = np.array_split(y_score, num_bins) - bins = np.sort(np.array( - [ - bin_group.max() for bin_group in bin_groups[:-1] - ] - + [np.inf] - ) - ) + bins = np.sort(np.array([bin_group.max() for bin_group in bin_groups[:-1]] + [np.inf])) return bins @@ -936,9 +902,7 @@ def calc_bins( return bins, bin_accs, bin_confs, bin_sizes # type: ignore -def check_split_strategy( - strategy: Optional[str] -) -> str: +def check_split_strategy(strategy: Optional[str]) -> str: """ Checks that the split strategy provided is valid and defults None split strategy to "uniform". @@ -960,15 +924,11 @@ def check_split_strategy( if strategy is None: strategy = "uniform" if strategy not in SPLIT_STRATEGIES: - raise ValueError( - "Please provide a valid splitting strategy." - ) + raise ValueError("Please provide a valid splitting strategy.") return strategy -def check_number_bins( - num_bins: int -) -> int: +def check_number_bins(num_bins: int) -> int: """ Checks that the bin specified is a number. @@ -987,9 +947,7 @@ def check_number_bins( When num_bins is a negative number is raises an error. """ if isinstance(num_bins, int) is False: - raise ValueError( - "Please provide a bin number as an integer." - ) + raise ValueError("Please provide a bin number as an integer.") elif num_bins < 1: raise ValueError( """ @@ -1001,9 +959,7 @@ def check_number_bins( return num_bins -def check_binary_zero_one( - y_true: ArrayLike -) -> NDArray: +def check_binary_zero_one(y_true: ArrayLike) -> NDArray: """ Checks if the array is binary and changes a non binary array to a zero, one array. @@ -1026,8 +982,7 @@ def check_binary_zero_one( """ y_true = cast(NDArray, column_or_1d(y_true)) if type_of_target(y_true) == "binary": - if ((np.unique(y_true) != np.array([0, 1])).any() and - len(np.unique(y_true)) == 2): + if (np.unique(y_true) != np.array([0, 1])).any() and len(np.unique(y_true)) == 2: idx_min = np.where(y_true == np.min(y_true))[0] y_true[idx_min] = 0 idx_max = np.where(y_true == np.max(y_true))[0] @@ -1036,15 +991,11 @@ def check_binary_zero_one( else: return y_true else: - raise ValueError( - "Please provide y_true as a binary array." - ) + raise ValueError("Please provide y_true as a binary array.") def fix_number_of_classes( - n_classes_: int, - n_classes_training: NDArray, - y_proba: NDArray + n_classes_: int, n_classes_training: NDArray, y_proba: NDArray ) -> NDArray: """ Fix shape of y_proba of validation set if number of classes @@ -1063,23 +1014,13 @@ def fix_number_of_classes( NDArray Probabilities with the right number of classes. """ - y_pred_full = np.zeros( - shape=(len(y_proba), n_classes_) - ) + y_pred_full = np.zeros(shape=(len(y_proba), n_classes_)) y_index = np.tile(n_classes_training, (len(y_proba), 1)) - np.put_along_axis( - y_pred_full, - y_index, - y_proba, - axis=1 - ) + np.put_along_axis(y_pred_full, y_index, y_proba, axis=1) return y_pred_full -def check_array_shape_classification( - y_true: NDArray, - y_pred_set: NDArray -) -> NDArray: +def check_array_shape_classification(y_true: NDArray, y_pred_set: NDArray) -> NDArray: """ Fix shape of y_pred_set (to 3d array of shape (n_obs, n_class, n_alpha)). @@ -1117,10 +1058,7 @@ def check_array_shape_classification( return y_pred_set -def check_array_shape_regression( - y_true: NDArray, - y_intervals: NDArray -) -> NDArray: +def check_array_shape_regression(y_true: NDArray, y_intervals: NDArray) -> NDArray: """ Fix shape of y_intervals (to 3d array of shape (n_obs, 2, n_alpha)). @@ -1145,9 +1083,7 @@ def check_array_shape_regression( """ if len(y_intervals.shape) != 3: if len(y_intervals.shape) != 2: - raise ValueError( - "y_intervals should be a 3D array of shape (n_obs, 2, n_alpha)" - ) + raise ValueError("y_intervals should be a 3D array of shape (n_obs, 2, n_alpha)") else: y_intervals = np.expand_dims(y_intervals, axis=2) if y_true.shape[0] != y_intervals.shape[0]: diff --git a/notebooks/Makefile b/notebooks/Makefile index d85460c3..d6af603f 100644 --- a/notebooks/Makefile +++ b/notebooks/Makefile @@ -17,4 +17,4 @@ convert2rst: cp -r $(dir)/$(file)_files ../doc/ rm -rf $(dir)/.ipynb_checkpoints rm -rf $(dir)/${file}_files - rm -rf $(dir)/${file}.rst \ No newline at end of file + rm -rf $(dir)/${file}.rst diff --git a/notebooks/classification/Cifar10.md b/notebooks/classification/Cifar10.md index 681012e3..08d977d7 100755 --- a/notebooks/classification/Cifar10.md +++ b/notebooks/classification/Cifar10.md @@ -25,7 +25,7 @@ The goal of this notebook is to present how to use :class:`mapie.classification. > - **Cifar10 dataset** : 10 classes (horse, dog, cat, frog, deer, bird, airplane, truck, ship, automobile) -> - Use :class:`mapie.classification.MapieClassifier` to compare the prediction sets estimated by several conformal methods on the Cifar10 dataset. +> - Use :class:`mapie.classification.MapieClassifier` to compare the prediction sets estimated by several conformal methods on the Cifar10 dataset. > - Train a small CNN to predict the image class @@ -101,38 +101,38 @@ def train_valid_calib_split( ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ Create calib and valid datasets from the train dataset. - + Parameters ---------- X: np.ndarray of shape (n_samples, width, height, n_channels) Images of the dataset. - + y: np.ndarray of shape (n_samples, 1): Label of each image. - + calib_size: float Percentage of the dataset X to use as calibration set. - + val_size: float Percentage of the dataset X (minus the calibration set) to use as validation set. - + random_state: int Random state to use to split the dataset. - + By default 42. - + Returns ------- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray] - of shapes: + of shapes: (n_samples * (1 - calib_size) * (1 - val_size), width, height, n_channels), (n_samples * calib_size, width, height, n_channels), (n_samples * (1 - calib_size) * val_size, width, height, n_channels), (n_samples * (1 - calib_size) * (1 - val_size), 1), (n_samples * calib_size, 1), (n_samples * (1 - calib_size) * val_size, 1). - + """ X_train, X_calib, y_train, y_calib = train_test_split( X, y, @@ -158,8 +158,8 @@ def load_data() -> Tuple[ """ Load cifar10 Dataset and return train, valid, calib, test datasets and the names of the labels - - + + Returns ------- Tuple[ @@ -209,27 +209,27 @@ def load_data() -> Tuple[ def inspect_images( X: np.ndarray, y: np.ndarray, - num_images: int, + num_images: int, label_names: List ) -> None: """ Load a sample of the images to check that images are well loaded. - + Parameters ---------- X: np.ndarray of shape (n_samples, width, height, n_channels) Set of images from which the sample will be taken. - + y: np.ndarray of shape (n_samples, 1) Labels of the iamges of X. - + num_images: int Number of images to plot. - + label_names: List Names of the different labels - + """ _, ax = plt.subplots( @@ -250,17 +250,17 @@ def inspect_images( ```python train_set, val_set, calib_set, test_set, label_names = load_data() -(X_train, y_train, y_train_cat) = train_set -(X_val, y_val, y_val_cat) = val_set -(X_calib, y_calib, y_calib_cat) = calib_set -(X_test, y_test, y_test_cat) = test_set +(X_train, y_train, y_train_cat) = train_set +(X_val, y_val, y_val_cat) = val_set +(X_calib, y_calib, y_calib_cat) = calib_set +(X_test, y_test, y_test_cat) = test_set inspect_images(X=X_train, y=y_train, num_images=8, label_names=label_names) ``` ## 2. Definition and training of the the neural network -We define a simple convolutional neural network with the following architecture : +We define a simple convolutional neural network with the following architecture : > - 2 blocks of Convolution/Maxpooling > - Flatten the images @@ -277,21 +277,21 @@ def get_model( ) -> Sequential: """ Compile CNN model. - + Parameters ---------- input_shape: Tuple Size of th input images. - + loss: tfk.losses Loss to use to train the model. - + optimizer: tfk.optimizer Optimizer to use to train the model. - + metrics: List[str] Metrics to use evaluate model training. - + Returns ------- Sequential @@ -323,13 +323,13 @@ class TensorflowToMapie(): Class that aimes to make compatible a tensorflow model with MAPIE. To do so, this class create fit, predict, predict_proba and _sklearn_is_fitted_ attributes to the model. - + """ def __init__(self) -> None: self.pred_proba = None self.trained_ = False - + def fit( self, model: Sequential, @@ -338,26 +338,26 @@ class TensorflowToMapie(): ) -> None: """ Train the keras model. - + Parameters ---------- model: Sequential Model to train. - + X_train: np.ndarray of shape (n_sample_train, width, height, n_channels) Training images. - + y_train: np.ndarray of shape (n_samples_train, n_labels) Training labels. - + X_val: np.ndarray of shape (n_sample_val, width, height, n_channels) Validation images. - + y_val: np.ndarray of shape (n_samples_val, n_labels) Validation labels. - + """ - + early_stopping_monitor = EarlyStopping( monitor='val_loss', min_delta=0, @@ -368,12 +368,12 @@ class TensorflowToMapie(): restore_best_weights=True ) model.fit( - X_train, y_train, - batch_size=64, - validation_data=(X_val, y_val), + X_train, y_train, + batch_size=64, + validation_data=(X_val, y_val), epochs=20, callbacks=[early_stopping_monitor] ) - + self.model = model self.trained_ = True self.classes_ = np.arange(model.layers[-1].units) @@ -381,27 +381,27 @@ class TensorflowToMapie(): def predict_proba(self, X: np.ndarray) -> np.ndarray: """ Returns the predicted probabilities of the images in X. - + Paramters: X: np.ndarray of shape (n_sample, width, height, n_channels) Images to predict. - + Returns: np.ndarray of shape (n_samples, n_labels) """ preds = self.model.predict(X) - + return preds def predict(self, X: np.ndarray) -> np.ndarray: """ Give the label with the maximum softmax for each image. - + Parameters --------- X: np.ndarray of shape (n_sample, width, height, n_channels) Images to predict - + Returns: -------- np.ndarray of shape (n_samples, 1) @@ -419,9 +419,9 @@ class TensorflowToMapie(): ```python tags=[] model = get_model( - input_shape=(32, 32, 3), - loss=CategoricalCrossentropy(), - optimizer=Adam(), + input_shape=(32, 32, 3), + loss=CategoricalCrossentropy(), + optimizer=Adam(), metrics=['accuracy'] ) ``` @@ -441,7 +441,7 @@ y_pred = cirfar10_model.predict(X_test) ## 4. Prediction of the prediction sets -We will now estimate the prediction sets with the five conformal methods implemented in :class:`mapie.classification.MapieClassifier` for a range of confidence levels between 0 and 1. +We will now estimate the prediction sets with the five conformal methods implemented in :class:`mapie.classification.MapieClassifier` for a range of confidence levels between 0 and 1. ```python method_params = { @@ -459,7 +459,7 @@ y_preds, y_pss = {}, {} alphas = np.arange(0.01, 1, 0.01) for name, (method, include_last_label) in method_params.items(): - mapie = MapieClassifier(estimator=cirfar10_model, method=method, cv="prefit", random_state=42) + mapie = MapieClassifier(estimator=cirfar10_model, method=method, cv="prefit", random_state=42) mapie.fit(X_calib, y_calib) y_preds[name], y_pss[name] = mapie.predict(X_test, alpha=alphas, include_last_label=include_last_label) ``` @@ -470,11 +470,11 @@ Let's now estimate the number of null prediction sets, marginal coverages, and a def count_null_set(y: np.ndarray) -> int: """ Count the number of empty prediction sets. - + Parameters ---------- y: np.ndarray of shape (n_sample, ) - + Returns ------- int @@ -512,7 +512,7 @@ width_90 = {method: width[9] for method, width in sizes.items()} y_ps_90 = {method: y_ps[:, :, 9] for method, y_ps in y_pss.items()} ``` -Let's now look at the marginal coverages, number of null prediction sets, and the averaged size of prediction sets for a confidence level of 90 \%. +Let's now look at the marginal coverages, number of null prediction sets, and the averaged size of prediction sets for a confidence level of 90 \%. ```python summary_df = pd.concat( @@ -540,14 +540,14 @@ def prepare_plot(y_methods: Dict[str, Tuple], n_images: int) -> np.ndarray: """ Prepare the number and the disposition of the plots according to the number of images. - + Paramters: y_methods: Dict[str, Tuple] Methods we want to compare. - + n_images: int Number of images to plot. - + Returns ------- np.ndarray @@ -559,7 +559,7 @@ def prepare_plot(y_methods: Dict[str, Tuple], n_images: int) -> np.ndarray: f, ax = plt.subplots(ncol, nrow, figsize=(s*nrow, s*ncol)) f.tight_layout(pad=SPACE_IN_SUBPLOTS) rows = [i for i in y_methods.keys()] - + for x, row in zip(ax[:,0], rows): x.set_ylabel(row, rotation=90, size='large') @@ -571,21 +571,21 @@ def prepare_plot(y_methods: Dict[str, Tuple], n_images: int) -> np.ndarray: def get_position(y_set: List, label: str, count: int, count_true: int) -> float: """ Return the position of each label according to the number of labels to plot. - + Paramters --------- y_set: List Set of predicted labels for one image. - + label: str Indice of the true label. - + count: int Index of the label. - + count_true: int Total number of labels in the prediction set. - + Returns ------- float @@ -605,34 +605,34 @@ def add_text( ) -> None: """ Add the text to the corresponding image. - + Parameters ---------- ax: np.ndarray Matrix of the images to plot. - + indices: Tuple Tuple indicating the indices of the image to put the text on. - + position: float Position of the text on the image. - + label_name: str Name of the label to plot. - + proba: float Proba associated to this label. - + color: str Color of the text. - + missing: bool Whether or not the true label is missing in the prediction set. - + By default False. - + """ if not missing : text = f"{label_name} : {proba:.4f}" @@ -642,8 +642,8 @@ def add_text( ax[i, j].text( 15, position, - text, - ha="center", va="top", + text, + ha="center", va="top", color=color, font="courier new" ) @@ -662,28 +662,28 @@ def plot_prediction_sets( """ Plot random images with their associated prediction set for all the required methods. - + Parameters ---------- X: np.ndarray of shape (n_sample, width, height, n_channels) Array containing images. - + y: np.ndarray of shape (n_samples, ) Labels of the images. - + y_pred_proba: np.ndarray of shape (n_samples, n_labels) Softmax output of the model. - + y_methods: Dict[str, np.ndarray] Outputs of the MapieClassifier with the different choosen methods. - + n_images: int Number of images to plot - + random_state: Union[int, None] Random state to use to choose the images. - + By default None. """ random.seed(random_state) @@ -728,7 +728,7 @@ plot_prediction_sets(X_test, y_test, y_pred_proba, y_ps_90, 5, label_names) ## 6. Calibration of the methods -In this section, we plot the number of null sets, the marginal coverages, and the prediction set sizes as function of the target coverage level for all conformal methods. +In this section, we plot the number of null sets, the marginal coverages, and the prediction set sizes as function of the target coverage level for all conformal methods. ```python vars_y = [nulls, coverages, sizes] @@ -775,15 +775,15 @@ def get_class_coverage( Compute the coverage for each class. As MAPIE is looking for a global coverage of 1-alpha, it is important to check that their is not major coverage difference between classes. - + Parameters ---------- y_test: np.ndarray of shape (n_samples,) Labels of the predictions. - + y_method: Dict[str, np.ndarray] Prediction sets for each method. - + label_names: List[str] Names of the labels. """ @@ -799,7 +799,7 @@ def get_class_coverage( recap[method].append(score_coverage) recap_df = pd.DataFrame(recap, index = label_names) return recap_df - + ``` ```python @@ -822,15 +822,15 @@ def create_confusion_matrix(y_ps: np.ndarray, y_true: np.ndarray) -> np.ndarray: Create a confusion matrix to visualize, for each class, which classes are which are the most present classes in the prediction sets. - + Parameters ---------- y_ps: np.ndarray of shape (n_samples, n_labels) Prediction sets of a specific method. - + y_true: np.ndarray of shape (n_samples, ) Labels of the sample - + Returns ------- np.ndarray of shape (n_labels, n_labels) @@ -839,27 +839,27 @@ def create_confusion_matrix(y_ps: np.ndarray, y_true: np.ndarray) -> np.ndarray: confusion_matrix = np.zeros((number_of_classes, number_of_classes)) for i, ps in enumerate(y_ps): confusion_matrix[y_true[i]] += ps - + return confusion_matrix - + ``` ```python def reorder_labels(ordered_labels: List, labels: List, cm: np.ndarray) -> np.ndarray: """ Used to order the labels in the confusion matrix - + Parameters ---------- ordered_labels: List Order you want to have in your confusion matrix - + labels: List Initial order of the confusion matrix - + cm: np.ndarray of shape (n_labels, n_labels) Original confusion matrix - + Returns ------- np.ndarray of shape (n_labels, n_labels) @@ -868,7 +868,7 @@ def reorder_labels(ordered_labels: List, labels: List, cm: np.ndarray) -> np.nda index_order = [labels.index(label) for label in ordered_labels] for i, label in enumerate(ordered_labels): old_index = labels.index(label) - + cm_ordered[i] = cm[old_index, index_order] return cm_ordered ``` @@ -877,15 +877,15 @@ def reorder_labels(ordered_labels: List, labels: List, cm: np.ndarray) -> np.nda def plot_confusion_matrix(method: str, y_ps: Dict[str, np.ndarray], label_names: List) -> None: """ Plot the confusion matrix for a specific method. - + Parameters ---------- method: str Name of the method to plot. - + y_ps: Dict[str, np.ndarray] Prediction sets for each of the fitted method - + label_names: List Name of the labels """ diff --git a/notebooks/classification/tutorial_classification.md b/notebooks/classification/tutorial_classification.md index 2e9e099c..45a22633 100644 --- a/notebooks/classification/tutorial_classification.md +++ b/notebooks/classification/tutorial_classification.md @@ -22,7 +22,7 @@ Throughout this tutorial, we will answer the following questions: - How does the number of classes in the prediction sets vary according to the significance level ? -- Is the chosen conformal method well calibrated ? +- Is the chosen conformal method well calibrated ? - What are the pros and cons of the conformal methods included in MAPIE ? @@ -39,11 +39,11 @@ We estimate the prediction sets as follows : * First we generate a dataset with train, calibration and test, the model is fitted on the training set. * We set the conformal score $S_i = \hat{f}(X_{i})_{y_i}$ the softmax output of the true class for each sample in the calibration set. * Then we define $\hat{q}$ as being the $(n + 1) (\alpha) / n$ previous quantile of $S_{1}, ..., S_{n}$ -(this is essentially the quantile $\alpha$, but with a small sample correction). +(this is essentially the quantile $\alpha$, but with a small sample correction). * Finally, for a new test data point (where $X_{n + 1}$ is known but $Y_{n + 1}$ is not), create a prediction set $C(X_{n+1}) = \{y: \hat{f}(X_{n+1})_{y} > \hat{q}\}$ which includes all the classes with a sufficiently high softmax output. -We use a two-dimensional toy dataset with three labels. The distribution of the data is a bivariate normal with diagonal covariance matrices for each label. +We use a two-dimensional toy dataset with three labels. The distribution of the data is a bivariate normal with diagonal covariance matrices for each label. ```python import numpy as np @@ -90,7 +90,7 @@ plt.show() We fit our training data with a Gaussian Naive Base estimator. And then we apply MAPIE in the calibration data with the method ``score`` to the estimator indicating that it has already been fitted with `cv="prefit"`. We then estimate the prediction sets with differents alpha values with a -``fit`` and ``predict`` process. +``fit`` and ``predict`` process. ```python from sklearn.naive_bayes import GaussianNB @@ -222,7 +222,7 @@ plot_coverages_widths(alpha2, coverages_score, widths_score, "lac") ## 2. Conformal Prediction method using the cumulative softmax score -We saw in the previous section that the "lac" method is well calibrated by providing accurate coverage levels. However, it tends to give null prediction sets for uncertain regions, especially when the $\alpha$ value is high. MAPIE includes another method, called Adaptive Prediction Set (APS), whose conformity score is the cumulated score of the softmax output until the true label is reached (see the theoretical description for more details). We will see in this Section that this method no longer estimates null prediction sets but by giving slightly bigger prediction sets. +We saw in the previous section that the "lac" method is well calibrated by providing accurate coverage levels. However, it tends to give null prediction sets for uncertain regions, especially when the $\alpha$ value is high. MAPIE includes another method, called Adaptive Prediction Set (APS), whose conformity score is the cumulated score of the softmax output until the true label is reached (see the theoretical description for more details). We will see in this Section that this method no longer estimates null prediction sets but by giving slightly bigger prediction sets. Let's visualize the prediction sets obtained with the APS method on the test set after fitting MAPIE on the calibration set. @@ -238,7 +238,7 @@ y_pred_aps, y_ps_aps = mapie_aps.predict(X_test_mesh, alpha=alpha, include_last_ plot_results(alpha, X_test_mesh, y_pred_aps, y_ps_aps) ``` -One can notice that the uncertain regions are emphasized by wider boundaries, but without null prediction sets with respect to the first "lac" method. +One can notice that the uncertain regions are emphasized by wider boundaries, but without null prediction sets with respect to the first "lac" method. ```python _, y_ps_aps2 = mapie_aps.predict(X_test, alpha=alpha2, include_last_label="randomized") @@ -256,4 +256,4 @@ widths_aps = [ plot_coverages_widths(alpha2, coverages_aps, widths_aps, "lac") ``` -This method also gives accurate calibration plots, meaning that the effective coverage level is always very close to the target coverage, sometimes at the expense of slightly bigger prediction sets. +This method also gives accurate calibration plots, meaning that the effective coverage level is always very close to the target coverage, sometimes at the expense of slightly bigger prediction sets. diff --git a/notebooks/regression/exoplanets.md b/notebooks/regression/exoplanets.md index f7175852..a9f0e97a 100755 --- a/notebooks/regression/exoplanets.md +++ b/notebooks/regression/exoplanets.md @@ -247,7 +247,7 @@ def plot_predictionintervals( suptitle: str, ) -> None: fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 6)) - + ax1.errorbar( x=y_train, y=y_train_pred, @@ -273,7 +273,7 @@ def plot_predictionintervals( ax1.set_xlabel("True values", fontsize=12) ax1.set_ylabel("Predicted values", fontsize=12) ax1.legend() - + ax2.scatter( x=y_train, y=y_train_pred_high - y_train_pred_low, alpha=0.8, label="train", marker="." ) diff --git a/notebooks/regression/ts-changepoint.md b/notebooks/regression/ts-changepoint.md index 3837c3d3..9015b566 100644 --- a/notebooks/regression/ts-changepoint.md +++ b/notebooks/regression/ts-changepoint.md @@ -15,7 +15,7 @@ in which the training set is prior to the validation set. The best model is then feeded into `mapie.time_series_regression.MapieTimeSeriesRegressor` to estimate the associated prediction intervals. We compare four approaches: with or without -``partial_fit`` called at every step. +``partial_fit`` called at every step. ```python @@ -70,7 +70,7 @@ for hour in range(1, n_lags): num_test_steps = 24 * 7 demand_train = demand_df.iloc[:-num_test_steps, :].copy() demand_test = demand_df.iloc[-num_test_steps:, :].copy() -features = ["Weekofyear", "Weekday", "Hour", "Temperature"] +features = ["Weekofyear", "Weekday", "Hour", "Temperature"] features += [f"Lag_{hour}" for hour in range(1, n_lags)] X_train = demand_train.loc[ @@ -97,9 +97,9 @@ plt.ylabel("Hourly demand (GW)") - + ![png](output_9_1.png) - + ## 3. Optimize the base estimator @@ -253,12 +253,12 @@ plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths) ![png](output_21_0.png) - + ## VI. Forecast on test dataset with change point -We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training. +We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training. ### Corrupt the dataset @@ -300,9 +300,9 @@ plt.plot(y_test) - + ![png](output_27_1.png) - + ### Prediction intervals without partial fit @@ -349,7 +349,7 @@ for step in range(gap, len(X_test), gap): ) = mapie_enbpi.predict( X_test.iloc[step:(step + gap), :], alpha=alpha, - ensemble=True, + ensemble=True, optimize_beta=True ) conformity_scores_pfit.append(mapie_enbpi.conformity_scores_) @@ -384,7 +384,7 @@ plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_coverage= ![png](output_34_0.png) - + @@ -422,9 +422,9 @@ plt.plot(y_test[window:].index, rolling_coverage_pfit, label="With update of res - + ![png](output_37_1.png) - + ### Temporal evolution of the distribution of residuals used for estimating prediction intervals @@ -447,7 +447,5 @@ plt.legend(loc=[1, 0]) - -![png](output_39_1.png) - +![png](output_39_1.png) diff --git a/notebooks/regression/tutorial_regression.md b/notebooks/regression/tutorial_regression.md index 5a45f2ec..59e15a7a 100644 --- a/notebooks/regression/tutorial_regression.md +++ b/notebooks/regression/tutorial_regression.md @@ -16,7 +16,7 @@ jupyter: # Tutorial for regression -In this tutorial, we compare the prediction intervals estimated by MAPIE on a +In this tutorial, we compare the prediction intervals estimated by MAPIE on a simple, one-dimensional, ground truth function $$ @@ -28,15 +28,15 @@ Throughout this tutorial, we will answer the following questions: - How well do the MAPIE strategies capture the aleatoric uncertainty existing in the data? - How do the prediction intervals estimated by the resampling strategies - evolve for new *out-of-distribution* data? + evolve for new *out-of-distribution* data? - How do the prediction intervals vary between regressor models? -Throughout this tutorial, we estimate the prediction intervals first using -a polynomial function, and then using a boosting model, and a simple neural network. +Throughout this tutorial, we estimate the prediction intervals first using +a polynomial function, and then using a boosting model, and a simple neural network. -**For practical problems, we advise using the faster CV+ strategies. -For conservative prediction interval estimates, you can alternatively +**For practical problems, we advise using the faster CV+ strategies. +For conservative prediction interval estimates, you can alternatively use the CV-minmax strategies.** @@ -63,7 +63,7 @@ def x_sinx(x): ```python def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): """ - Generate 1D noisy data uniformely from the given function + Generate 1D noisy data uniformely from the given function and standard deviation for the noise. """ np.random.seed(59) @@ -76,8 +76,8 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise): return X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh ``` -We first generate noisy one-dimensional data uniformely on an interval. -Here, the noise is considered as *homoscedastic*, since it remains constant +We first generate noisy one-dimensional data uniformely on an interval. +Here, the noise is considered as *homoscedastic*, since it remains constant over $x$. ```python @@ -87,7 +87,7 @@ X_train, y_train, X_test, y_test, y_mesh = get_1d_data_with_constant_noise( ) ``` -Let's visualize our noisy function. +Let's visualize our noisy function. ```python import matplotlib.pyplot as plt @@ -97,7 +97,7 @@ _ = plt.plot(X_test, y_mesh, color="C1") ``` As mentioned previously, we fit our training data with a simple -polynomial function. Here, we choose a degree equal to 10 so the function +polynomial function. Here, we choose a degree equal to 10 so the function is able to perfectly fit $x \times \sin(x)$. ```python @@ -133,7 +133,7 @@ from typing import Union, Optional from typing_extensions import TypedDict from mapie.regression import MapieRegressor from mapie.quantile_regression import MapieQuantileRegressor -from mapie.subsample import Subsample +from mapie.subsample import Subsample from sklearn.model_selection import train_test_split Params = TypedDict("Params", {"method": str, "cv": Union[int, str, Subsample], "alpha": Optional[float]}) STRATEGIES = { @@ -154,24 +154,24 @@ for strategy, params in STRATEGIES.items(): mapie = MapieQuantileRegressor(polyn_model_quant, **params) mapie.fit(X_train, y_train, random_state=1) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test) - else: + else: mapie = MapieRegressor(polyn_model, **params) mapie.fit(X_train, y_train) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05) ``` -Let’s now compare the target confidence intervals with the predicted intervals obtained +Let’s now compare the target confidence intervals with the predicted intervals obtained with the Jackknife+, Jackknife-minmax, CV+, CV-minmax, Jackknife+-after-Boostrap, and conformalized quantile regression (CQR) strategies. Note that for the Jackknife-after-Bootstrap method, we call the :class:`mapie.subsample.Subsample` object that allows us to train bootstrapped models. Note also that the CQR method is called with :class:`MapieQuantileRegressor` with a "split" strategy. ```python def plot_1d_data( X_train, - y_train, + y_train, X_test, y_test, y_sigma, - y_pred, - y_pred_low, + y_pred, + y_pred_low, y_pred_up, ax=None, title=None @@ -238,7 +238,7 @@ with homoscedastic noise, CQR would not be the preferred method. Let’s now compare the *effective* coverage, namely the fraction of test points whose true values lie within the prediction intervals, given by -the different strategies. +the different strategies. ```python import pandas as pd @@ -255,7 +255,7 @@ pd.DataFrame([ ], index=STRATEGIES, columns=["Coverage", "Width average"]).round(2) ``` -All strategies except the Naive one give effective coverage close to the expected +All strategies except the Naive one give effective coverage close to the expected 0.95 value (recall that alpha = 0.05), confirming the theoretical garantees. @@ -274,7 +274,7 @@ def x_sinx(x): ```python def get_1d_data_with_heteroscedastic_noise(funct, min_x, max_x, n_samples, noise): """ - Generate 1D noisy data uniformely from the given function + Generate 1D noisy data uniformely from the given function and standard deviation for the noise. """ np.random.seed(59) @@ -287,7 +287,7 @@ def get_1d_data_with_heteroscedastic_noise(funct, min_x, max_x, n_samples, noise return X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh ``` -We first generate noisy one-dimensional data uniformely on an interval. +We first generate noisy one-dimensional data uniformely on an interval. Here, the noise is considered as *heteroscedastic*, since it will increase linearly with $x$. ```python @@ -307,7 +307,7 @@ _ = plt.plot(X_test, y_mesh, color="C1") ``` As mentioned previously, we fit our training data with a simple -polynomial function. Here, we choose a degree equal to 10 so the function +polynomial function. Here, we choose a degree equal to 10 so the function is able to perfectly fit $x \times \sin(x)$. ```python @@ -357,7 +357,7 @@ for strategy, params in STRATEGIES.items(): mapie = MapieQuantileRegressor(polyn_model_quant, **params) mapie.fit(X_train, y_train, random_state=1) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test) - else: + else: mapie = MapieRegressor(polyn_model, **params) mapie.fit(X_train, y_train) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05) @@ -368,12 +368,12 @@ Once again, let’s compare the target confidence intervals with prediction inte ```python def plot_1d_data( X_train, - y_train, + y_train, X_test, y_test, y_sigma, - y_pred, - y_pred_low, + y_pred, + y_pred_low, y_pred_up, ax=None, title=None @@ -410,7 +410,7 @@ for strategy, coord in zip(strategies, coords): ) ``` -We can observe that all of the strategies except CQR seem to have similar constant prediction intervals. +We can observe that all of the strategies except CQR seem to have similar constant prediction intervals. On the other hand, the CQR strategy offers a solution that adapts the prediction intervals to the local noise. @@ -462,7 +462,7 @@ plt.legend(loc=[1, 0]) Let’s now conclude by summarizing the *effective* coverage, namely the fraction of test points whose true values lie within the prediction intervals, given by -the different strategies. +the different strategies. ```python import pandas as pd @@ -486,19 +486,19 @@ All the strategies have the wanted coverage, however, we notice that the CQR str Let’s now consider one-dimensional data without noise, but normally distributed. -The goal is to explore how the prediction intervals evolve for new data +The goal is to explore how the prediction intervals evolve for new data that lie outside the distribution of the training data in order to see how the strategies -can capture the *epistemic* uncertainty. +can capture the *epistemic* uncertainty. For a comparison of the epistemic and aleatoric uncertainties, please have a look at this [source](https://en.wikipedia.org/wiki/Uncertainty_quantification). -Lets" start by generating and showing the data. +Lets" start by generating and showing the data. ```python def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise): """ - Generate noisy 1D data with normal distribution from given function + Generate noisy 1D data with normal distribution from given function and noise standard deviation. """ np.random.seed(59) @@ -556,7 +556,7 @@ for strategy, params in STRATEGIES.items(): mapie = MapieQuantileRegressor(polyn_model_quant, **params) mapie.fit(X_train, y_train, random_state=1) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test) - else: + else: mapie = MapieRegressor(polyn_model, **params) mapie.fit(X_train, y_train) y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05) @@ -567,29 +567,29 @@ strategies = ["jackknife_plus", "jackknife_minmax", "cv_plus", "cv_minmax", "jac n_figs = len(strategies) fig, axs = plt.subplots(3, 2, figsize=(9, 13)) coords = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1], axs[2, 0], axs[2, 1]] -for strategy, coord in zip(strategies, coords): +for strategy, coord in zip(strategies, coords): plot_1d_data( X_train.ravel(), - y_train.ravel(), + y_train.ravel(), X_test.ravel(), y_mesh.ravel(), - 1.96*noise, + 1.96*noise, y_pred[strategy].ravel(), y_pis[strategy][:, 0, :].ravel(), - y_pis[strategy][:, 1, :].ravel(), + y_pis[strategy][:, 1, :].ravel(), ax=coord, title=strategy ) ``` At first glance, our polynomial function does not give accurate -predictions with respect to the true function when $|x > 6|$. -The prediction intervals estimated with the Jackknife+ do not seem to +predictions with respect to the true function when $|x > 6|$. +The prediction intervals estimated with the Jackknife+ do not seem to increase significantly, unlike the CV+ method whose prediction intervals capture a high uncertainty when $x > 6$. -Let's now compare the prediction interval widths between all strategies. +Let's now compare the prediction interval widths between all strategies. ```python @@ -609,8 +609,8 @@ Jackknife+ remain roughly constant until $|x| \sim 5$ before increasing. The CQR strategy seems to perform well, however, on the extreme values of the data the quantile regression fails to give reliable results as it outputs -negative value for the prediction intervals. This occurs because the quantile -regressor with quantile $1 - \alpha/2$ gives higher values than the quantile +negative value for the prediction intervals. This occurs because the quantile +regressor with quantile $1 - \alpha/2$ gives higher values than the quantile regressor with quantile $\alpha/2$. Note that a warning will be issued when this occurs. @@ -632,7 +632,7 @@ conservative than the Jackknife+ strategy, and tend to result in more reliable coverages for *out-of-distribution* data. It is therefore advised to use the three former strategies for predictions with new out-of-distribution data. -Note however that there are no theoretical guarantees on the coverage level +Note however that there are no theoretical guarantees on the coverage level for out-of-distribution data. Here it's important to note that the CQR strategy should not be taken into account for width prediction, and it is abundantly clear from the negative width coverage that @@ -647,7 +647,7 @@ illustrate this by comparing the prediction intervals estimated by the CV+ metho different models: - the same polynomial function as before. - + - a XGBoost model using the Scikit-learn API. - a simple neural network, a Multilayer Perceptron with three dense layers, using the KerasRegressor wrapper. @@ -712,13 +712,13 @@ xgb_model = XGBRegressor( nthread=-1 ) mlp_model = KerasRegressor( - build_fn=mlp, - epochs=500, + build_fn=mlp, + epochs=500, verbose=0 ) ``` -Let's now use MAPIE to estimate the prediction intervals using the CV+ method +Let's now use MAPIE to estimate the prediction intervals using the CV+ method and compare their prediction interval. ```python @@ -758,7 +758,7 @@ ax.set_ylabel("Prediction Interval Width") ax.legend(model_names + ["True width"], fontsize=8); ``` -As expected with the CV+ method, the prediction intervals are a bit +As expected with the CV+ method, the prediction intervals are a bit conservative since they are slightly wider than the true intervals. -However, the CV+ method on the three models gives very promising results -since the prediction intervals closely follow the true intervals with $x$. +However, the CV+ method on the three models gives very promising results +since the prediction intervals closely follow the true intervals with $x$. diff --git a/requirements.dev.txt b/requirements.dev.txt index 8d93c882..cee36e37 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -4,4 +4,4 @@ jupyter==1.0.0 pandas matplotlib twine==3.7.1 -wheel==0.38.1 \ No newline at end of file +wheel==0.38.1 diff --git a/requirements.doc.txt b/requirements.doc.txt index c6e16c35..54b0f4f8 100644 --- a/requirements.doc.txt +++ b/requirements.doc.txt @@ -2,4 +2,4 @@ numpydoc==1.1.0 sphinx==4.3.2 sphinx-gallery==0.10.1 sphinx_rtd_theme==1.0.0 -typing_extensions==4.0.1 \ No newline at end of file +typing_extensions==4.0.1 diff --git a/setup.py b/setup.py index 84883651..47a49d88 100644 --- a/setup.py +++ b/setup.py @@ -4,10 +4,7 @@ DISTNAME = "MAPIE" VERSION = "0.7.0" -DESCRIPTION = ( - "A scikit-learn-compatible module " - "for estimating prediction intervals." -) +DESCRIPTION = "A scikit-learn-compatible module " "for estimating prediction intervals." with codecs.open("README.rst", encoding="utf-8-sig") as f: LONG_DESCRIPTION = f.read() LONG_DESCRIPTION_CONTENT_TYPE = "text/x-rst" @@ -16,15 +13,11 @@ PROJECT_URLS = { "Bug Tracker": "https://github.com/scikit-learn-contrib/MAPIE/issues", "Documentation": "https://mapie.readthedocs.io/en/latest/", - "Source Code": "https://github.com/scikit-learn-contrib/MAPIE" + "Source Code": "https://github.com/scikit-learn-contrib/MAPIE", } LICENSE = "new BSD" MAINTAINER = "T. Cordier, V. Blot, L. Lacombe" -MAINTAINER_EMAIL = ( - "tcordier@quantmetry.com, " - "vblot@quantmetry.com, " - "llacombe@quantmetry.com" -) +MAINTAINER_EMAIL = "tcordier@quantmetry.com, " "vblot@quantmetry.com, " "llacombe@quantmetry.com" PYTHON_REQUIRES = ">=3.7" PACKAGES = find_packages() INSTALL_REQUIRES = ["scikit-learn", "packaging"] @@ -41,7 +34,7 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10" + "Programming Language :: Python :: 3.10", ] setup( @@ -60,5 +53,5 @@ python_requires=PYTHON_REQUIRES, install_requires=INSTALL_REQUIRES, classifiers=CLASSIFIERS, - zip_safe=False # the package can run out of an .egg file + zip_safe=False, # the package can run out of an .egg file ) From 50f2755df6d9d24ff5ae090504b0d6ce9ba5fb45 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 08:53:21 +0000 Subject: [PATCH 03/13] UPD: add mapie in conda requirements --- environment.ci.yml | 2 ++ environment.dev.yml | 2 ++ environment.doc.yml | 2 ++ environment.notebooks.yml | 1 + 4 files changed, 7 insertions(+) diff --git a/environment.ci.yml b/environment.ci.yml index 4a0b2b98..d9e978d9 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -12,3 +12,5 @@ dependencies: - twine - wheel - pandas + - pip: + - -e . diff --git a/environment.dev.yml b/environment.dev.yml index 5485a0e7..ce1ca2f8 100644 --- a/environment.dev.yml +++ b/environment.dev.yml @@ -9,3 +9,5 @@ dependencies: - pandas - matplotlib - lightgbm + - pip: + - -e . diff --git a/environment.doc.yml b/environment.doc.yml index b1ea21a8..36da5bf9 100644 --- a/environment.doc.yml +++ b/environment.doc.yml @@ -11,3 +11,5 @@ dependencies: - pandas - matplotlib - lightgbm + - pip: + - -e . diff --git a/environment.notebooks.yml b/environment.notebooks.yml index 9e93eb4c..8c2ce960 100755 --- a/environment.notebooks.yml +++ b/environment.notebooks.yml @@ -14,6 +14,7 @@ dependencies: - pip=22.0.3 - pip: - scikeras==0.4.1 + - -e . - python=3.10 - scikit-learn=1.0.1 - seaborn=0.11.2 From ccd6c836fd37d7867c7dfdb1bcf414068b2b278b Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:01:54 +0000 Subject: [PATCH 04/13] UPD: change workflow --- .github/workflows/test.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2060ce12..6ed1da9a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,12 @@ name: Unit tests -on: [push, pull_request] +on: + push: + branches: + -dev + -main + -master + pull_request: jobs: build: @@ -23,6 +29,9 @@ jobs: - os: windows-latest python-version: "3.10" numpy-version: 1.22.3 + - os: macos-latest + python-version: "3.10" + numpy-version: 1.22.3 defaults: run: shell: bash -l {0} From 4562a30ef36ff74903561950342f358cf7756839 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:06:44 +0000 Subject: [PATCH 05/13] CLEAN: remove double quotation marks --- examples/regression/2-advanced-analysis/plot_nested-cv.py | 4 ++-- mapie/classification.py | 4 ++-- mapie/multi_label_classification.py | 4 ++-- mapie/subsample.py | 4 ++-- mapie/utils.py | 8 ++++---- setup.py | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/regression/2-advanced-analysis/plot_nested-cv.py b/examples/regression/2-advanced-analysis/plot_nested-cv.py index 2d32c313..decd6d30 100644 --- a/examples/regression/2-advanced-analysis/plot_nested-cv.py +++ b/examples/regression/2-advanced-analysis/plot_nested-cv.py @@ -122,13 +122,13 @@ score_nested = mean_squared_error(y_test, y_pred_nested, squared=False) # Print scores and effective coverages. -print("Scores and effective coverages for the CV+ strategy using the " "Random Forest model.") +print("Scores and effective coverages for the CV+ strategy using the Random Forest model.") print( "Score on the test set for the non-nested and nested CV approaches: ", f"{score_non_nested: .3f}, {score_nested: .3f}", ) print( - "Effective coverage on the test set for the non-nested " "and nested CV approaches: ", + "Effective coverage on the test set for the non-nested and nested CV approaches: ", f"{coverage_non_nested: .3f}, {coverage_nested: .3f}", ) diff --git a/mapie/classification.py b/mapie/classification.py index 8a6fad3b..46562b9b 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -340,7 +340,7 @@ def _check_include_last_label( """ if (not isinstance(include_last_label, bool)) and (not include_last_label == "randomized"): raise ValueError( - "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." + "Invalid include_last_label argument. Should be a boolean or 'randomized'." ) else: return include_last_label @@ -428,7 +428,7 @@ def _get_last_index_included( ) else: raise ValueError( - "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." + "Invalid include_last_label argument. Should be a boolean or 'randomized'." ) return y_pred_index_last[:, np.newaxis, :] diff --git a/mapie/multi_label_classification.py b/mapie/multi_label_classification.py index a6b3f5dd..aaf428b5 100644 --- a/mapie/multi_label_classification.py +++ b/mapie/multi_label_classification.py @@ -226,7 +226,7 @@ def _check_all_labelled(self, y: NDArray) -> None: """ if not (y.sum(axis=1) > 0).all(): raise ValueError( - "Invalid y. " "All observations should contain at " "least one label." + "Invalid y. All observations should contain at least one label." ) def _check_delta(self, delta: Optional[float]): @@ -259,7 +259,7 @@ def _check_delta(self, delta: Optional[float]): "Recall with RCPS or Precision with LTT" ) elif (delta <= 0) or (delta >= 1): - raise ValueError("Invalid delta. " "delta must be in ]0, 1[") + raise ValueError("Invalid delta. delta must be in ]0, 1[") if (self.method == "crc") and (delta is not None): warnings.warn( "WARNING: you are using crc method, hence " diff --git a/mapie/subsample.py b/mapie/subsample.py index f8e09ecf..51e02bb9 100644 --- a/mapie/subsample.py +++ b/mapie/subsample.py @@ -171,7 +171,7 @@ def split(self, X: NDArray) -> Generator[Tuple[NDArray, NDArray], None, None]: """ if (self.n_blocks is not None) + (self.length is not None) != 1: raise ValueError( - "Exactly one argument between ``length`` or " "``n_blocks`` has to be not None" + "Exactly one argument between ``length`` or ``n_blocks`` has to be not None" ) n = len(X) @@ -186,7 +186,7 @@ def split(self, X: NDArray) -> Generator[Tuple[NDArray, NDArray], None, None]: indices = np.arange(n) if (length <= 0) or (length > n): raise ValueError( - "The length of blocks is <= 0 or greater than the length" "of training set." + "The length of blocks is <= 0 or greater than the lengthof training set." ) if self.overlapping: diff --git a/mapie/utils.py b/mapie/utils.py index 1906df72..8c7a4abf 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -191,7 +191,7 @@ def check_cv( elif cv >= 2: return KFold(n_splits=cv, shuffle=True, random_state=random_state) else: - raise ValueError("Invalid cv argument. " "Allowed integer values are -1 or int >= 2.") + raise ValueError("Invalid cv argument. Allowed integer values are -1 or int >= 2.") elif isinstance(cv, BaseCrossValidator): return cv elif isinstance(cv, BaseShuffleSplit): @@ -246,7 +246,7 @@ def check_alpha(alpha: Optional[Union[float, Iterable[float]]] = None) -> Option else: raise ValueError("Invalid alpha. Allowed values are float or Iterable.") if len(alpha_np.shape) != 1: - raise ValueError("Invalid alpha." "Please provide a one-dimensional list of values.") + raise ValueError("Invalid alpha.Please provide a one-dimensional list of values.") if alpha_np.dtype.type not in [np.float64, np.float32]: raise ValueError("Invalid alpha. Allowed values are Iterable of floats.") if np.any(np.logical_or(alpha_np <= 0, alpha_np >= 1)): @@ -553,7 +553,7 @@ def check_conformity_score( return conformity_score else: raise ValueError( - "Invalid conformity_score argument.\n" "Must be None or a ConformityScore instance." + "Invalid conformity_score argument.\nMust be None or a ConformityScore instance." ) @@ -620,7 +620,7 @@ def check_estimator_fit_predict(estimator: Union[RegressorMixin, ClassifierMixin """ if not (hasattr(estimator, "fit") and hasattr(estimator, "predict")): raise ValueError( - "Invalid estimator. " "Please provide a regressor with fit and predict methods." + "Invalid estimator. Please provide a regressor with fit and predict methods." ) diff --git a/setup.py b/setup.py index 47a49d88..5f8dbe7c 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ DISTNAME = "MAPIE" VERSION = "0.7.0" -DESCRIPTION = "A scikit-learn-compatible module " "for estimating prediction intervals." +DESCRIPTION = "A scikit-learn-compatible module for estimating prediction intervals." with codecs.open("README.rst", encoding="utf-8-sig") as f: LONG_DESCRIPTION = f.read() LONG_DESCRIPTION_CONTENT_TYPE = "text/x-rst" @@ -17,7 +17,7 @@ } LICENSE = "new BSD" MAINTAINER = "T. Cordier, V. Blot, L. Lacombe" -MAINTAINER_EMAIL = "tcordier@quantmetry.com, " "vblot@quantmetry.com, " "llacombe@quantmetry.com" +MAINTAINER_EMAIL = "tcordier@quantmetry.com, vblot@quantmetry.com, llacombe@quantmetry.com" PYTHON_REQUIRES = ">=3.7" PACKAGES = find_packages() INSTALL_REQUIRES = ["scikit-learn", "packaging"] From 3f724e55ec255702fe0fadcb105cecf32fe32171 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:11:44 +0000 Subject: [PATCH 06/13] UPD: add pre-commit instructions --- CONTRIBUTING.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index ee6b723d..7eb60229 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -16,7 +16,7 @@ The typical workflow for contributing to `mapie` is: 1. Fork the `master` branch from the `GitHub repository `_. 2. Clone your fork locally. -3. Commit changes. +3. Commit changes. (Optional) Install pre-commit to run code style checks before each commit. 4. Push the changes to your fork. 5. Send a pull request from your fork back to the original `master` branch. @@ -42,7 +42,14 @@ Finally install `mapie` in development mode: .. code:: sh - pip install -e . + $ pip install -e . + +(Optional) Install pre-commit to run code style checks before each commit. + +.. code:: sh + + $ pip install pre-commit + $ pre-commit install Documenting your change From 0df4d4167303753b542decbba9bbb4a097d2ee0a Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:11:56 +0000 Subject: [PATCH 07/13] UPD: add pre-commit instructions --- CONTRIBUTING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7eb60229..2e3702d0 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -16,7 +16,7 @@ The typical workflow for contributing to `mapie` is: 1. Fork the `master` branch from the `GitHub repository `_. 2. Clone your fork locally. -3. Commit changes. (Optional) Install pre-commit to run code style checks before each commit. +3. Commit changes. (Optional) Use pre-commit to run code style checks before each commit. 4. Push the changes to your fork. 5. Send a pull request from your fork back to the original `master` branch. From 9d03a409a666b12f3a3865fc05b981824f196be3 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:27:11 +0000 Subject: [PATCH 08/13] UPD: black instructions --- .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/test.yml | 2 ++ CONTRIBUTING.rst | 10 ++++++++++ Makefile | 3 +++ 4 files changed, 16 insertions(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b851bbc4..8c75c72a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -26,6 +26,7 @@ Please describe the tests that you ran to verify your changes. Provide instructi - [ ] I have updated the [HISTORY.rst](https://github.com/simai-ml/MAPIE/blob/master/HISTORY.rst) and [AUTHORS.rst](https://github.com/simai-ml/MAPIE/blob/master/AUTHORS.rst) files - [ ] Linting passes successfully : `make lint` - [ ] Typing passes successfully : `make type-check` +- [ ] Code style check pass successfully : `make black` - [ ] Unit tests pass successfully : `make tests` - [ ] Coverage is 100% : `make coverage` - [ ] Documentation builds successfully : `make doc` diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6ed1da9a..290cfec0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,6 +51,8 @@ jobs: run: make lint - name: Check static typing run: make type-check + - name: Check code style check + run: make black - name: Test with pytest run: make coverage - name: Code coverage diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2e3702d0..b08f6856 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -97,6 +97,16 @@ These tests absolutely have to pass. $ make type-check +Style code check +^^^^^^^^^^^^^^^^ + +These tests absolutely have to pass. + +.. code:: sh + + $ make black + + Unit tests ^^^^^^^^^^ diff --git a/Makefile b/Makefile index 34e1d0bd..c033e327 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,9 @@ lint: flake8 . --exclude=doc,build +black: + black -l 99 --check . + type-check: mypy mapie From 107af84a139518db1797406aa194a16d86f7ccda Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:39:57 +0000 Subject: [PATCH 09/13] UPD: call black with python --- Makefile | 2 +- ...plot_tutorial_multilabel_classification.py | 1 - .../plot_kim2020_simulations.py | 1 - mapie/multi_label_classification.py | 4 +--- mapie/tests/test_metrics.py | 21 +++---------------- 5 files changed, 5 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index c033e327..5ceedd2c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ lint: flake8 . --exclude=doc,build black: - black -l 99 --check . + python -m black -l 99 --check . type-check: mypy mapie diff --git a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py index 771039f8..076da786 100644 --- a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py +++ b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py @@ -119,7 +119,6 @@ y_pss, recalls, thresholds, r_hats, r_hat_pluss = {}, {}, {}, {}, {} y_test_repeat = np.repeat(y_test[:, :, np.newaxis], len(alpha), 2) for i, (name, (method, bound)) in enumerate(method_params.items()): - mapie = MapieMultiLabelClassifier(estimator=clf, method=method, metric_control="recall") mapie.fit(X_cal, y_cal) diff --git a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py index 81e0d220..06b6541c 100644 --- a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py +++ b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py @@ -474,7 +474,6 @@ def plot_results(results: pd.DataFrame, score: str) -> None: if __name__ == "__main__": - results_coverages_widths = comparison_JAB( model=Ridge2(), alpha=0.1, diff --git a/mapie/multi_label_classification.py b/mapie/multi_label_classification.py index aaf428b5..fc5992c5 100644 --- a/mapie/multi_label_classification.py +++ b/mapie/multi_label_classification.py @@ -225,9 +225,7 @@ def _check_all_labelled(self, y: NDArray) -> None: has no label. """ if not (y.sum(axis=1) > 0).all(): - raise ValueError( - "Invalid y. All observations should contain at least one label." - ) + raise ValueError("Invalid y. All observations should contain at least one label.") def _check_delta(self, delta: Optional[float]): """ diff --git a/mapie/tests/test_metrics.py b/mapie/tests/test_metrics.py index 2128cfaf..4cf7c897 100644 --- a/mapie/tests/test_metrics.py +++ b/mapie/tests/test_metrics.py @@ -197,26 +197,11 @@ def test_regression_same_length() -> None: with pytest.raises(ValueError, match=r".*y should be a 1d array*"): regression_mean_width_score(y_preds[:, :2], y_preds[:, 2]) with pytest.raises(ValueError, match=r".*shape mismatch*"): - regression_ssc( - y_toy, - intervals[ - :-1, - ], - ) + regression_ssc(y_toy, intervals[:-1]) with pytest.raises(ValueError, match=r".*shape mismatch*"): - regression_ssc_score( - y_toy, - intervals[ - :-1, - ], - ) + regression_ssc_score(y_toy, intervals[:-1]) with pytest.raises(ValueError, match=r".*shape mismatch*"): - hsic( - y_toy, - intervals[ - :-1, - ], - ) + hsic(y_toy, intervals[:-1]) def test_regression_toydata_coverage_score() -> None: From 91575a5d8cc9b7946a767026e8a7f6002f559490 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:52:18 +0000 Subject: [PATCH 10/13] FIX: reformat inline --- mapie/tests/test_metrics.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/mapie/tests/test_metrics.py b/mapie/tests/test_metrics.py index 4cf7c897..41b5cc01 100644 --- a/mapie/tests/test_metrics.py +++ b/mapie/tests/test_metrics.py @@ -69,20 +69,10 @@ ) y_pred_set_2alphas = np.array( [ - [ - [False, False], - [False, True], - [False, True], - [False, False], - ], + [[False, False], [False, True], [False, True], [False, False]], [[False, False], [True, True], [True, True], [True, True]], [[False, False], [True, False], [True, False], [False, False]], - [ - [True, False], - [True, False], - [True, True], - [True, False], - ], + [[True, False], [True, False], [True, True], [True, False]], [[False, False], [True, True], [False, True], [True, True]], ] ) @@ -105,10 +95,7 @@ "1alpha_base": Params_ssc_classif(y_pred_set=y_pred_set_2alphas[:, :, 0], num_bins=2), "1alpha_3sp": Params_ssc_classif(y_pred_set=y_pred_set_2alphas[:, :, 0], num_bins=3), "1alpha_None": Params_ssc_classif(y_pred_set=y_pred_set_2alphas[:, :, 0], num_bins=None), - "2alpha_base": Params_ssc_classif( - y_pred_set=y_pred_set_2alphas, - num_bins=2, - ), + "2alpha_base": Params_ssc_classif(y_pred_set=y_pred_set_2alphas, num_bins=2), "2alpha_3sp": Params_ssc_classif(y_pred_set=y_pred_set_2alphas, num_bins=3), "2alpha_None": Params_ssc_classif(y_pred_set=y_pred_set_2alphas, num_bins=None), } @@ -349,12 +336,7 @@ def test_top_label_same_result() -> None: scr2 = top_label_ece(y_true_, pred_max_, y_score_arg=pred_argmax_) classes = np.unique([y_true_ + 1]) - scr3 = top_label_ece( - y_true_ + 1, - pred_proba_, - classes=classes, - ) - + scr3 = top_label_ece(y_true_ + 1, pred_proba_, classes=classes) scr4 = top_label_ece( y_true_ + 1, np.max(pred_proba_, axis=1), classes[np.argmax(pred_proba_, axis=1)] ) From 07b8906109e077b1e399f2ede47907f63249898d Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:54:31 +0000 Subject: [PATCH 11/13] FIX: add black in requirement --- Makefile | 2 +- environment.ci.yml | 1 + requirements.ci.txt | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5ceedd2c..c033e327 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ lint: flake8 . --exclude=doc,build black: - python -m black -l 99 --check . + black -l 99 --check . type-check: mypy mapie diff --git a/environment.ci.yml b/environment.ci.yml index d9e978d9..e37125a1 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -5,6 +5,7 @@ channels: dependencies: - codecov - flake8==4.0.1 + - black==22.8.0 - mypy - pytest - pytest-cov diff --git a/requirements.ci.txt b/requirements.ci.txt index 23aaac54..60741f8a 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -1,5 +1,6 @@ codecov flake8==4.0.1 +black==22.8.0 mypy pytest pytest-cov From 9406f6f613df311cb559a3174673f79bd8839f74 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:01:10 +0000 Subject: [PATCH 12/13] UPD: reduce ignore list --- .flake8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flake8 b/.flake8 index 1446331f..ca8220ea 100644 --- a/.flake8 +++ b/.flake8 @@ -1,7 +1,7 @@ [flake8] exclude = .git, .github, __pycache__ , .vscode, build max-line-length = 99 -ignore = E302,E305,W503,E203,E731,E402,E266,E712,F401,F821 +ignore = E203, W503 indent-size = 4 per-file-ignores = */__init__.py:F401 From 31a7a00a13e71acbbaa57a75e367695e334832f1 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Fri, 29 Sep 2023 15:07:23 +0200 Subject: [PATCH 13/13] UPD: change check order --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8c75c72a..e51b8d73 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -24,9 +24,9 @@ Please describe the tests that you ran to verify your changes. Provide instructi - [ ] I have read the [contributing guidelines](https://github.com/simai-ml/MAPIE/blob/master/CONTRIBUTING.rst) - [ ] I have updated the [HISTORY.rst](https://github.com/simai-ml/MAPIE/blob/master/HISTORY.rst) and [AUTHORS.rst](https://github.com/simai-ml/MAPIE/blob/master/AUTHORS.rst) files +- [ ] Code style check pass successfully : `make black` - [ ] Linting passes successfully : `make lint` - [ ] Typing passes successfully : `make type-check` -- [ ] Code style check pass successfully : `make black` - [ ] Unit tests pass successfully : `make tests` - [ ] Coverage is 100% : `make coverage` - [ ] Documentation builds successfully : `make doc`