diff --git a/.Rbuildignore b/.Rbuildignore index 3c35250b8..b28fc8eff 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,6 +18,7 @@ ^revdep$ ^CRAN-RELEASE$ ^\.covrignore$ +^\.DS_Store$ ^lastMiKTeXException$ ^bench$ ^benchmark$ @@ -37,3 +38,4 @@ ^CRAN-SUBMISSION$ ^man-images$ ^vignettes/articles$ +^_archive$ diff --git a/.Rprofile b/.Rprofile deleted file mode 100644 index 23afe7877..000000000 --- a/.Rprofile +++ /dev/null @@ -1,5 +0,0 @@ -# this sets the dev folder in the libPath -tryCatch( - devtools::dev_mode(on = TRUE), - error = function(e) invisible() -) diff --git a/.github/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE.md similarity index 58% rename from .github/pull_request_template.md rename to .github/PULL_REQUEST_TEMPLATE.md index 3688a6414..2fa240b1e 100644 --- a/.github/pull_request_template.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,20 +8,16 @@ Reviewer Checklist (if item does not apply, mark is as complete) -- [ ] Ensure all package dependencies are installed by running `renv::install()` -- [ ] PR branch has pulled the most recent updates from master branch. Ensure the pull request branch and your local version match and both have the latest updates from the master branch. -- [ ] If an update was made to `tbl_summary()`, was the same change implemented for `tbl_svysummary()`? -- [ ] If a new function was added, function included in `_pkgdown.yml` -- [ ] If a bug was fixed, a unit test was added for the bug check +- [ ] Ensure all package dependencies are installed: `renv::install()` +- [ ] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` +- [ ] If a bug was fixed, a unit test was added. - [ ] Run `pkgdown::build_site()`. Check the R console for errors, and review the rendered website. -- [ ] Code coverage is suitable for any new functions/features. Review coverage with `withr::with_envvar(new = c("NOT_CRAN" = "true"), covr::report())`. Begin in a fresh R session without any packages loaded. -- [ ] R CMD Check runs without errors, warnings, and notes +- [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` - [ ] `usethis::use_spell_check()` runs with no spelling errors in documentation When the branch is ready to be merged into master: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# gtsummary (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] Increment the version number using `usethis::use_version(which = "dev")` -- [ ] Run `codemetar::write_codemeta()` - [ ] Run `usethis::use_spell_check()` again - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge". diff --git a/.github/workflows/R-CMD-check-as-cran.yaml b/.github/workflows/R-CMD-check-as-cran.yaml new file mode 100644 index 000000000..525728535 --- /dev/null +++ b/.github/workflows/R-CMD-check-as-cran.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help + +on: + push: + branches: [main, master] + pull_request: + # branches: [main, master] + +name: R-CMD-check-as-cran + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.github/workflows/R-CMD-check-historic-R-versions.yaml b/.github/workflows/R-CMD-check-historic-R-versions.yaml deleted file mode 100644 index 71922c60b..000000000 --- a/.github/workflows/R-CMD-check-historic-R-versions.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: - - main - - master - pull_request: - branches: - - main - - master - -name: R-CMD-historic-R-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-latest, r: 'oldrel-2'} - - {os: ubuntu-latest, r: 'oldrel-3'} - # - {os: ubuntu-latest, r: 'oldrel-4'} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 - - # installing nlopt for R versions less than 3.6 - # this is for the installation of nloptr package required for lme4 - - name: Install nlopt on Linux - if: runner.os == 'Linux' - run: sudo apt-get install pkg-config libnlopt-dev - - - name: Query dependencies - run: | - install.packages('remotes', repos = 'http://cran.rstudio.com') - saveRDS(remotes::dev_package_deps(dependencies = TRUE, repos = 'http://cran.rstudio.com'), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - sudo apt-get update -y - sudo apt-get install -y texlive-fonts-extra - sudo apt-get install -y libcurl4-openssl-dev - - - name: Install dependencies - run: | - remotes::install_cran("curl", repos = 'https://packagemanager.posit.co/cran/latest') # added this because curl install was failing from CRAN, hopeing this uses the binaries available from RSPM - remotes::install_deps(dependencies = TRUE, repos = 'http://cran.rstudio.com') - remotes::install_cran("rcmdcheck", repos = 'http://cran.rstudio.com') - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5c626acd2..c1261daf5 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help # # NOTE: This workflow is overkill for most R packages and @@ -8,7 +8,7 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] + # branches: [main, master] name: R-CMD-check @@ -22,12 +22,13 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} + # use 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: '4.1'} - # Use older ubuntu to maximise backward compatibility - # - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} # - {os: ubuntu-latest, r: 'oldrel-2'} @@ -39,7 +40,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 @@ -51,24 +52,18 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: rcmdcheck - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - sudo apt-get update -y - sudo apt-get install -y texlive-fonts-extra + extra-packages: any::rcmdcheck + needs: check - - uses: r-lib/actions/check-r-package@v2 + # this is hopefully a transitory issue and we can delete in the future (2024-05-02) + # https://github.com/lme4/lme4/issues/763 + - name: Install Matrix and lme4 from CRAN + run: utils::install.packages(c("Matrix", "lme4"), repos = c(CRAN = "https://cloud.r-project.org")) + shell: Rscript {0} - - name: Show testthat output - if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + # not using the default '--as-cran' arg, because it sets up private libs (see https://github.com/r-lib/devtools/issues/2044#issuecomment-526209877) + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true + args: 'c("--no-manual")' diff --git a/.github/workflows/close.yaml b/.github/workflows/close.yaml deleted file mode 100644 index 0ab93f0d9..000000000 --- a/.github/workflows/close.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: 'Close stale issues and PRs' -on: - schedule: - - cron: '0 0 * * *' - -jobs: - stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v3 - with: - days-before-stale: 365 - stale-issue-label: 'stale issue' - stale-pr-label: 'stale pull request' - stale-issue-message: 'This issue has been automatically closed due to inactivity.' - stale-pr-message: 'This pull request has been automatically closed due to inactivity.' - days-before-close: 14 - exempt-assignees: 'ddsjoberg,larmarange,emilyvertosick,jalavery,karissawhiting,larmarange,michaelcurry1123,shannonpileggi,zabore' - exempt-issue-labels: 'waiting,hackathon,help wanted' - exempt-all-milestones: true diff --git a/.github/workflows/continuous-benchmarks.txt b/.github/workflows/continuous-benchmarks.txt deleted file mode 100644 index 7aa191979..000000000 --- a/.github/workflows/continuous-benchmarks.txt +++ /dev/null @@ -1,52 +0,0 @@ -# on: -# push: -# pull_request: -# -# name: Benchmark -# -# jobs: -# build: -# runs-on: macOS-latest -# steps: -# - name: Checkout repo -# uses: actions/checkout@master -# -# - name: Setup R -# uses: r-lib/actions/setup-r@master -# -# - name: Query dependencies -# run: | -# install.packages('remotes') -# saveRDS(remotes::dev_package_deps(dependencies = c("Depends", "Imports")), ".github/bench_depends.Rds", version = 2) -# writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") -# shell: Rscript {0} -# -# - name: Cache R packages -# uses: actions/cache@v1 -# with: -# path: ${{ env.R_LIBS_USER }} -# key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/bench_depends.Rds') }} -# restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- -# -# - name: Install dependencies -# run: | -# install.packages(c('devtools', 'microbenchmark', 'here', 'gert', 'usethis', 'lubridate', 'readr', 'ggplot2', 'rmarkdown', 'knitr')) -# remotes::install_deps(dependencies = c("Depends", "Imports")) # don't need suggests to run most fns -# shell: Rscript {0} -# -# - name: Install Pandoc -# uses: r-lib/actions/setup-pandoc@9598b8eeb6d88de7d76d580d84443542bbfdffce -# with: -# pandoc-version: 2.11.1.1 -# -# - name: Build RMD REPORT -# run: | -# Rscript -e 'rmarkdown::render(input = here::here("benchmark/README.Rmd"), output_file = here::here("benchmark/README.md"), clean = TRUE, output_format = "md_document")' -# -# - name: Commit files -# run: | -# git config --global user.email "ghau@example.com" -# git config --global user.name "GitHub Actions User" -# git add --all -# git commit -am "continous benchmark autocommit" -# git push diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 0b2602168..3dabb62c5 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -4,7 +4,7 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] + # branches: [main, master] release: types: [published] workflow_dispatch: @@ -19,8 +19,10 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 @@ -31,7 +33,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::pkgdown, local::. - needs: website + needs: check - name: Build site run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) @@ -39,7 +41,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@4.1.4 + uses: JamesIves/github-pages-deploy-action@v4.4.1 with: clean: false branch: gh-pages diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 98ca228bc..71f335b3e 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: issue_comment: @@ -8,13 +8,13 @@ name: Commands jobs: document: - if: startsWith(github.event.comment.body, '/document') + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} name: document runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/pr-fetch@v2 with: @@ -26,10 +26,12 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: roxygen2 + extra-packages: any::roxygen2 + needs: pr-document - name: Document - run: Rscript -e 'roxygen2::roxygenise()' + run: roxygen2::roxygenise() + shell: Rscript {0} - name: commit run: | @@ -43,13 +45,13 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} style: - if: startsWith(github.event.comment.body, '/style') + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} name: style runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/pr-fetch@v2 with: @@ -58,10 +60,12 @@ jobs: - uses: r-lib/actions/setup-r@v2 - name: Install dependencies - run: Rscript -e 'install.packages("styler")' + run: install.packages("styler") + shell: Rscript {0} - name: Style - run: Rscript -e 'styler::style_pkg()' + run: styler::style_pkg() + shell: Rscript {0} - name: commit run: | diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 4b01af758..8bee3758d 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,5 +1,5 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main, master] @@ -15,7 +15,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-r@v2 with: @@ -23,8 +23,28 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: covr + extra-packages: any::covr + needs: check - name: Test coverage - run: covr::codecov() + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index 8515c29a4..da42bb941 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,67 +1,38 @@ Package: gtsummary -Title: Presentation-Ready Data Summary and Analytic Result - Tables -Version: 1.7.2.9001 -Authors@R: - c(person(given = "Daniel D.", - family = "Sjoberg", - role = c("aut", "cre"), - email = "danield.sjoberg@gmail.com", - comment = c(ORCID = "0000-0003-0862-2018")), - person(given = "Joseph", - family = "Larmarange", - role = "aut", - comment = c(ORCID = "0000-0001-7097-700X")), - person(given = "Michael", - family = "Curry", - role = "aut", - comment = c(ORCID = "0000-0002-0261-4044")), - person(given = "Jessica", - family = "Lavery", - role = "aut", - comment = c(ORCID = "0000-0002-2746-5647")), - person(given = "Karissa", - family = "Whiting", - role = "aut", - comment = c(ORCID = "0000-0002-4683-1868")), - person(given = "Emily C.", - family = "Zabor", - role = "aut", - comment = c(ORCID = "0000-0002-1402-4498")), - person(given = "Xing", - family = "Bai", - role = "ctb"), - person(given = "Esther", - family = "Drill", - role = "ctb", - comment = c(ORCID = "0000-0002-3315-4538")), - person(given = "Jessica", - family = "Flynn", - role = "ctb", - comment = c(ORCID = "0000-0001-8310-6684")), - person(given = "Margie", - family = "Hannum", - role = "ctb", - comment = c(ORCID = "0000-0002-2953-0449")), - person(given = "Stephanie", - family = "Lobaugh", - role = "ctb"), - person(given = "Shannon", - family = "Pileggi", - role = "ctb", - comment = c(ORCID = "0000-0002-7732-4164")), - person(given = "Amy", - family = "Tin", - role = "ctb", - comment = c(ORCID = "0000-0002-8005-0694")), - person(given = "Gustavo", - family = "Zapata Wainberg", - role = "ctb", - comment = c(ORCID = "0000-0002-2524-3637"))) -Description: Creates presentation-ready tables summarizing data - sets, regression models, and more. The code to create the tables is - concise and highly customizable. Data frames can be summarized with - any function, e.g. mean(), median(), even user-written functions. +Title: Presentation-Ready Data Summary and Analytic Result Tables +Version: 1.9.9.9000 +Authors@R: c( + person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-0862-2018")), + person("Joseph", "Larmarange", role = "aut", + comment = c(ORCID = "0000-0001-7097-700X")), + person("Michael", "Curry", role = "aut", + comment = c(ORCID = "0000-0002-0261-4044")), + person("Jessica", "Lavery", role = "aut", + comment = c(ORCID = "0000-0002-2746-5647")), + person("Karissa", "Whiting", role = "aut", + comment = c(ORCID = "0000-0002-4683-1868")), + person("Emily C.", "Zabor", role = "aut", + comment = c(ORCID = "0000-0002-1402-4498")), + person("Xing", "Bai", role = "ctb"), + person("Esther", "Drill", role = "ctb", + comment = c(ORCID = "0000-0002-3315-4538")), + person("Jessica", "Flynn", role = "ctb", + comment = c(ORCID = "0000-0001-8310-6684")), + person("Margie", "Hannum", role = "ctb", + comment = c(ORCID = "0000-0002-2953-0449")), + person("Stephanie", "Lobaugh", role = "ctb"), + person("Shannon", "Pileggi", role = "ctb", + comment = c(ORCID = "0000-0002-7732-4164")), + person("Amy", "Tin", role = "ctb", + comment = c(ORCID = "0000-0002-8005-0694")), + person("Gustavo", "Zapata Wainberg", role = "ctb", + comment = c(ORCID = "0000-0002-2524-3637")) + ) +Description: Creates presentation-ready tables summarizing data sets, + regression models, and more. The code to create the tables is concise + and highly customizable. Data frames can be summarized with any + function, e.g. mean(), median(), even user-written functions. Regression models are summarized and include the reference rows for categorical variables. Common regression models, such as logistic regression and Cox proportional hazards regression, are automatically @@ -72,66 +43,44 @@ URL: https://github.com/ddsjoberg/gtsummary, https://www.danieldsjoberg.com/gtsummary/ BugReports: https://github.com/ddsjoberg/gtsummary/issues Depends: - R (>= 3.4) + R (>= 4.1) Imports: - broom (>= 1.0.1), - broom.helpers (>= 1.14.0), - cli (>= 3.1.1), - dplyr (>= 1.1.1), - forcats (>= 1.0.0), + cards (>= 0.1.0.9031), + cli (>= 3.6.1), + dplyr (>= 1.1.3), glue (>= 1.6.2), gt (>= 0.10.0), - knitr (>= 1.37), - lifecycle (>= 1.0.1), - purrr (>= 1.0.1), - rlang (>= 1.0.3), - stringr (>= 1.4.0), + lifecycle (>= 1.0.3), + rlang (>= 1.1.1), tibble (>= 3.2.1), - tidyr (>= 1.1.4), - vctrs (>= 0.5.2) + tidyr (>= 1.3.0), + vctrs Suggests: - aod (>= 1.3.1), - broom.mixed (>= 0.2.9), - car (>= 3.0-11), - cmprsk, - covr, - effectsize (>= 0.6.0), - emmeans (>= 1.7.3), - flextable (>= 0.8.1), - geepack, - ggstats (>= 0.2.1), - Hmisc, - huxtable (>= 5.4.0), - insight (>= 0.15.0), - kableExtra (>= 1.3.4), + broom (>= 1.0.5), + broom.helpers (>= 1.15.0), + broom.mixed, + cardx (>= 0.1.0.9039), + effectsize, + emmeans, + htmltools, + knitr, lme4, - mgcv, - mice (>= 3.10.0), - nnet, - officer, - openxlsx, - parameters (>= 0.20.2), - parsnip (>= 0.1.7), - rmarkdown, - sandwich (>= 3.0.1), - scales, - smd (>= 0.6.6), - spelling (>= 2.2), - survey, - survival (>= 3.2-11), - testthat (>= 3.0.4), - tidycmprsk (>= 0.1.2), - workflows (>= 0.2.4) + smd, + testthat (>= 3.2.0), + withr VignetteBuilder: knitr RdMacros: lifecycle +Remotes: + github::insightsengineering/cards, + github::insightsengineering/cardx +Config/Needs/check: broom, broom.helpers, broom.mixed, lme4, effectsize, + emmeans, smd +Config/testthat/edition: 3 +Config/testthat/parallel: true Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 -Config/testthat/edition: 3 -Config/testthat/parallel: true -Config/Needs/website: - lubridate +RoxygenNote: 7.3.1 diff --git a/LICENSE b/LICENSE index ad805f491..f9bb6ab1f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2020 -COPYRIGHT HOLDER: Daniel D. Sjoberg +YEAR: 2023 +COPYRIGHT HOLDER: gts2 authors diff --git a/LICENSE.md b/LICENSE.md index f61a44ea4..13f4bd185 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2020 Daniel D. Sjoberg +Copyright (c) 2023 gts2 authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NAMESPACE b/NAMESPACE index 3185d1bb6..dca5f7adc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,129 +1,53 @@ # Generated by roxygen2: do not edit by hand -S3method(add_ci,tbl_summary) -S3method(add_ci,tbl_svysummary) -S3method(add_global_p,tbl_regression) -S3method(add_global_p,tbl_uvregression) +S3method(add_difference,tbl_summary) S3method(add_n,tbl_regression) S3method(add_n,tbl_summary) -S3method(add_n,tbl_survfit) -S3method(add_n,tbl_svysummary) -S3method(add_n,tbl_uvregression) S3method(add_nevent,tbl_regression) -S3method(add_nevent,tbl_survfit) -S3method(add_nevent,tbl_uvregression) -S3method(add_overall,tbl_continuous) -S3method(add_overall,tbl_custom_summary) S3method(add_overall,tbl_summary) -S3method(add_overall,tbl_svysummary) -S3method(add_p,tbl_continuous) -S3method(add_p,tbl_cross) S3method(add_p,tbl_summary) -S3method(add_p,tbl_survfit) -S3method(add_p,tbl_svysummary) +S3method(add_stat_label,tbl_summary) S3method(as.data.frame,gtsummary) S3method(as_tibble,gtsummary) +S3method(assign_tests,tbl_summary) S3method(bold_labels,gtsummary) -S3method(bold_labels,tbl_cross) S3method(bold_levels,gtsummary) -S3method(bold_levels,tbl_cross) -S3method(inline_text,gtsummary) -S3method(inline_text,tbl_cross) -S3method(inline_text,tbl_regression) -S3method(inline_text,tbl_summary) -S3method(inline_text,tbl_survfit) -S3method(inline_text,tbl_survival) -S3method(inline_text,tbl_svysummary) -S3method(inline_text,tbl_uvregression) S3method(italicize_labels,gtsummary) -S3method(italicize_labels,tbl_cross) S3method(italicize_levels,gtsummary) -S3method(italicize_levels,tbl_cross) -S3method(knit_print,gtsummary) -S3method(plot,tbl_regression) -S3method(plot,tbl_uvregression) S3method(print,gtsummary) -S3method(print,tbl_split) -S3method(tbl_regression,brmsfit) -S3method(tbl_regression,crr) S3method(tbl_regression,default) -S3method(tbl_regression,gam) -S3method(tbl_regression,glmerMod) -S3method(tbl_regression,glmmTMB) -S3method(tbl_regression,glmmadmb) -S3method(tbl_regression,lmerMod) -S3method(tbl_regression,mipo) -S3method(tbl_regression,mira) -S3method(tbl_regression,model_fit) -S3method(tbl_regression,multinom) -S3method(tbl_regression,stanreg) -S3method(tbl_regression,survreg) -S3method(tbl_regression,tidycrr) -S3method(tbl_regression,workflow) -S3method(tbl_split,gtsummary) -S3method(tbl_survfit,data.frame) -S3method(tbl_survfit,list) -S3method(tbl_survfit,survfit) export("%>%") -export(.create_gtsummary_object) -export(.escape_html) -export(.escape_latex) -export(.escape_latex2) export(.table_styling_expr_to_row_number) -export(add_ci) export(add_difference) -export(add_glance_source_note) -export(add_glance_table) -export(add_global_p) export(add_n) export(add_nevent) export(add_overall) export(add_p) export(add_q) -export(add_significance_stars) export(add_stat) export(add_stat_label) -export(add_vif) export(all_categorical) -export(all_character) export(all_continuous) export(all_continuous2) -export(all_contrasts) export(all_dichotomous) -export(all_double) -export(all_factor) -export(all_integer) -export(all_interaction) -export(all_intercepts) -export(all_logical) -export(all_numeric) export(all_of) export(all_stat_cols) export(all_tests) export(any_of) -export(as_flex_table) -export(as_flextable) export(as_gt) -export(as_hux_table) -export(as_hux_xlsx) -export(as_kable) -export(as_kable_extra) export(as_tibble) +export(assign_summary_digits) +export(assign_summary_type) +export(assign_tests) export(bold_labels) export(bold_levels) -export(bold_p) -export(check_gtsummary_theme) -export(combine_terms) +export(brdg_summary) +export(card_summary) export(contains) -export(continuous_summary) export(ends_with) export(everything) -export(filter_p) -export(get_gtsummary_theme) -export(inline_text) export(italicize_labels) export(italicize_levels) -export(knit_print) export(last_col) export(matches) export(modify_caption) @@ -138,181 +62,47 @@ export(modify_footnote) export(modify_header) export(modify_spanning_header) export(modify_table_body) -export(modify_table_header) export(modify_table_styling) export(mutate) export(num_range) export(one_of) -export(pool_and_tidy_mice) -export(proportion_summary) -export(ratio_summary) -export(remove_row_type) -export(reset_gtsummary_theme) +export(pier_summary_categorical) +export(pier_summary_continuous) +export(pier_summary_continuous2) +export(pier_summary_dichotomous) +export(pier_summary_missing_row) export(select) -export(separate_p_footnotes) -export(set_gtsummary_theme) -export(show_header_names) -export(sort_p) export(starts_with) +export(styfn_number) +export(styfn_percent) +export(styfn_pvalue) +export(styfn_ratio) +export(styfn_sigfig) export(style_number) export(style_percent) export(style_pvalue) export(style_ratio) export(style_sigfig) -export(tbl_butcher) -export(tbl_continuous) -export(tbl_cross) -export(tbl_custom_summary) -export(tbl_merge) export(tbl_regression) -export(tbl_split) -export(tbl_stack) -export(tbl_strata) -export(tbl_strata2) export(tbl_summary) -export(tbl_survfit) -export(tbl_survival) -export(tbl_svysummary) -export(tbl_uvregression) -export(theme_gtsummary_compact) -export(theme_gtsummary_continuous2) -export(theme_gtsummary_eda) -export(theme_gtsummary_journal) -export(theme_gtsummary_language) -export(theme_gtsummary_mean_sd) -export(theme_gtsummary_printer) -export(tidy_bootstrap) -export(tidy_gam) -export(tidy_robust) -export(tidy_standardize) -export(tidy_wald_test) export(vars) -export(with_gtsummary_theme) -importFrom(broom.helpers,.formula_list_to_named_list) -importFrom(broom.helpers,.generic_selector) -importFrom(broom.helpers,.select_to_varnames) -importFrom(cli,cli_alert_danger) -importFrom(cli,cli_alert_info) -importFrom(cli,cli_code) -importFrom(cli,cli_ul) +export(where) +import(rlang) importFrom(dplyr,"%>%") +importFrom(dplyr,across) importFrom(dplyr,all_of) importFrom(dplyr,any_of) -importFrom(dplyr,arrange) -importFrom(dplyr,bind_cols) -importFrom(dplyr,bind_rows) -importFrom(dplyr,case_when) -importFrom(dplyr,coalesce) +importFrom(dplyr,as_tibble) importFrom(dplyr,contains) -importFrom(dplyr,count) -importFrom(dplyr,desc) -importFrom(dplyr,distinct) importFrom(dplyr,ends_with) importFrom(dplyr,everything) -importFrom(dplyr,filter) -importFrom(dplyr,full_join) -importFrom(dplyr,group_by) -importFrom(dplyr,if_else) -importFrom(dplyr,inner_join) importFrom(dplyr,last_col) -importFrom(dplyr,left_join) importFrom(dplyr,matches) importFrom(dplyr,mutate) -importFrom(dplyr,mutate_all) -importFrom(dplyr,mutate_at) -importFrom(dplyr,n) importFrom(dplyr,num_range) importFrom(dplyr,one_of) -importFrom(dplyr,pull) -importFrom(dplyr,rename) -importFrom(dplyr,rename_at) -importFrom(dplyr,row_number) -importFrom(dplyr,rowwise) importFrom(dplyr,select) -importFrom(dplyr,slice) importFrom(dplyr,starts_with) -importFrom(dplyr,ungroup) importFrom(dplyr,vars) -importFrom(glue,as_glue) +importFrom(dplyr,where) importFrom(glue,glue) -importFrom(glue,glue_collapse) -importFrom(gt,html) -importFrom(gt,md) -importFrom(knitr,knit_print) -importFrom(lifecycle,deprecate_soft) -importFrom(purrr,chuck) -importFrom(purrr,compact) -importFrom(purrr,cross_df) -importFrom(purrr,discard) -importFrom(purrr,every) -importFrom(purrr,flatten) -importFrom(purrr,imap) -importFrom(purrr,imap_dfr) -importFrom(purrr,imap_lgl) -importFrom(purrr,keep) -importFrom(purrr,map) -importFrom(purrr,map2) -importFrom(purrr,map2_chr) -importFrom(purrr,map_chr) -importFrom(purrr,map_dbl) -importFrom(purrr,map_dfr) -importFrom(purrr,map_if) -importFrom(purrr,map_lgl) -importFrom(purrr,negate) -importFrom(purrr,partial) -importFrom(purrr,pluck) -importFrom(purrr,pmap) -importFrom(purrr,pmap_chr) -importFrom(purrr,pmap_dbl) -importFrom(purrr,pmap_lgl) -importFrom(purrr,reduce) -importFrom(purrr,some) -importFrom(rlang,"%||%") -importFrom(rlang,":=") -importFrom(rlang,.data) -importFrom(rlang,.env) -importFrom(rlang,abort) -importFrom(rlang,call2) -importFrom(rlang,check_dots_empty) -importFrom(rlang,enexpr) -importFrom(rlang,enquo) -importFrom(rlang,eval_tidy) -importFrom(rlang,expr) -importFrom(rlang,exprs) -importFrom(rlang,inform) -importFrom(rlang,inject) -importFrom(rlang,is_character) -importFrom(rlang,is_empty) -importFrom(rlang,is_function) -importFrom(rlang,is_list) -importFrom(rlang,is_named) -importFrom(rlang,is_string) -importFrom(rlang,parse_expr) -importFrom(rlang,quo_is_null) -importFrom(rlang,quo_text) -importFrom(rlang,set_names) -importFrom(rlang,sym) -importFrom(rlang,syms) -importFrom(stats,as.formula) -importFrom(stats,weights) -importFrom(stringr,fixed) -importFrom(stringr,str_detect) -importFrom(stringr,str_extract_all) -importFrom(stringr,str_locate) -importFrom(stringr,str_remove) -importFrom(stringr,str_remove_all) -importFrom(stringr,str_replace_all) -importFrom(stringr,str_split) -importFrom(stringr,str_starts) -importFrom(stringr,str_sub) -importFrom(stringr,str_wrap) -importFrom(stringr,word) -importFrom(tibble,as_tibble) -importFrom(tibble,deframe) -importFrom(tibble,enframe) -importFrom(tibble,tibble) -importFrom(tibble,tribble) -importFrom(tidyr,complete) -importFrom(tidyr,nest) -importFrom(tidyr,spread) -importFrom(tidyr,unnest) diff --git a/NEWS.md b/NEWS.md index 81ccd0c0f..9ed25993d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,70 @@ # gtsummary (development version) -* Fix in `add_difference()` for pair t-tests. Previously, the sign of the reported difference depended on which group appeared first in the source data. Function has been updated to consistently report the difference as the first group mean minus the second group mean. (#1557) +### Overview of Changes + +#### User-facing Updates + +* The `tbl_regression.tidycrr()` S3 method has been removed and migrated to the {tidycmprsk} package. + +* The `add_q(quiet)` argument has been deprecated. + +* After `tbl_regression()`, the `.$model_obj` is no longer returned with the object. This is (and always has been) available in `.$inputs$x`. + +* Argument `add_p.tbl_summary(adj.vars)` was added to more easily add p-values that are adjusted/stratified by other columns in a data frame. + +* The counts in the header of `tbl_summary(by)` tables now appear on a new line. + +* If a column is all `NA_character_` in `tbl_summary()`, the default summary type is now `"continuous"`, where previously it was `"dichotomous"`. + +* Added a family of function `styfn_*()` that are similar to the `style_*()` except they return a styling _function_, rather than a styled value. + +* Previously, in a `tbl_summary()` variables that were `c(0, 1)`, `c("no", "yes")`, `c("No", "Yes")`, and `c("NO", "YES")` would default to a dichotomous summary with the `1` and `yes` level being shown in the table. This would occur even in the case when, for example, only `0` was observed. In this release, the line shown for dichotomous variables must be observed OR the unobserved level must be either explicitly defined in a factor or be a logical vector. This means that a character vector of all `"yes"` or all `"no"` values will default to a categorical summary instead of dichotomous. + +* Previously, indentation was handled with `modify_table_styling(text_format = c("indent", "indent2"))`, which would indent a cell 4 and 8 spaces, respectively. Handling of indentation has been migrated to `modify_table_styling(indent = integer())`, and by default, the label column is indented to zero spaces. This makes it easier to indent a group of rows. + +* The inputs for `modify_table_styling(undo_text_format)` has been updated to mirror its counterpart `modify_table_styling(text_format)` and no longer accepts `TRUE` or `FALSE`. + +* In `tbl_summary()`, the default calculation for quantiles (e.g. statistics of the form `"p25"` or `"p75"`) has been updated with type `quantile(type=2)`. + +* In `tbl_summary()`, dates and times showed the minimum and maximum values only by default. They are now treated as all other continuous summaries and share their default statistics of the median and IQR. + +* The values passed in `tbl_summary(value)` are now only checked for columns that are summary type `"dichotomous"`. + +* Previously, the gtsummary selecting functions, e.g. `all_categorical()`, `all_continuous()`, etc., would error if used out of context. They will now select no columns when used out-of-context. + +* Added the following methods for calculating differences in `add_difference.tbl_summary()`: Hedge's G, Paired data Cohen's D, and Paired data Hedge's G. All three are powered by the {effectsize} package. + +#### Internal Updates + +* Greater consistency has been put in place for all calculated statistics in gtsummary. Previously, each function handled its own calculations and transforming these statistics into data frames that would be printed. Now each function will first prepare an Analysis Result Dataset (ARD), and ARDs are converted to gtsummary structures using bridge functions (prefixed with `brdg_*()`). The bridge functions will be exported to allow anyone to more easily extend gtsummary functions. + +### Bug Fixes + +* Fix in `add_difference()` for paired t-tests. Previously, the sign of the reported difference depended on which group appeared first in the source data. Function has been updated to consistently report the difference as the first group mean minus the second group mean. (#1557) + +### Deprecations + +* Arguments `modify_header(quiet)`, `modify_footnote(quiet)`, and `modify_spanning_header(quiet)` have been deprecated. Verbose messaging is no longer available. + +* Arguments `modify_header(update)`, `modify_footnote(update)`, `modify_spanning_header(update)`, and `modify_fmt_fun()` have been deprecated. Use dynamic dots instead, e.g. `modify_header(...)` + +* Arguments `add_stat(fmt_fun, header, footnote, new_col_name)` have been deprecated since v1.4.0 (2021-04-13). They have now been fully removed from the package. + +* Global options have been deprecated in gtsummary since v1.3.1 (2020-06-02). They have now been fully removed from the package. + +* The `modify_header(stat_by)` argument was deprecated in v1.3.6 (2021-01-08), and has now been fully removed from the package. + +* Use of the `vars()` selector was first removed in v1.2.5 (2020-02-11), and the messaging about the deprecation was kicked up in June 2022. This use is now defunct and the function will soon no longer be exported. + +* The `as_flextable()` function was deprecated in v1.3.3 (2020-08-11), and has now been fully removed from the package. + +* Custom selectors `all_numeric()`, `all_character()`, `all_integer()`, `all_double()`, `all_logical()`, `all_factor()` functions were deprecated in v1.3.6 (2021-01-08), and has now been fully removed from the package. These functions were added before the `tidyselect::where()` function was released, which is a replacement for all these functions. + +* The `modify_cols_merge()` functions was renamed to `modify_column_merge()` to match the other function names in v1.6.1 (2022-06-22). The deprecation has been upgraded from a warning to an error. + +* The `add_p(test = ~'aov')` test is now deprecated as identical results can be obtained with `add_p(test = ~'oneway.test', test.args = ~list(var.equal = TRUE))`. + +* Previously, `add_p.tbl_summary()` would coerce various data types to classes compatible with some base R tests. For example, we would convert `difftime` classes to general numeric before passing to `wilcox.test()`. We have eliminated type- and class-specific handling in these functions and it is now left to the the user pass data compatible with the functions that calculate the p-values or to create a custom test that wraps `wilcox.test()` and performs the conversion. This change is effective immediately. # gtsummary 1.7.2 diff --git a/R/add_difference.R b/R/add_difference.R index 1399ea426..92ac33779 100644 --- a/R/add_difference.R +++ b/R/add_difference.R @@ -1,29 +1,52 @@ -#' Add difference between groups +#' Add differences #' -#' Add the difference between two groups (typically mean difference), -#' along with the difference confidence interval and p-value. +#' - [`add_difference.tbl_summary()`] #' -#' @param x `"tbl_summary"` or `"tbl_svysummary"` object +#' @param x (`gtsummary`)\cr +#' Object with class 'gtsummary' +#' @param ... Passed to other methods. +#' @keywords internal +#' @author Daniel D. Sjoberg +#' @export +#' +#' @seealso [`add_difference.tbl_summary()`] +add_difference <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + UseMethod("add_difference") +} + + +#' Add differences between groups +#' +#' Adds difference to tables created by [`tbl_summary()`]. +#' The difference between two groups (typically mean or rate difference) is added +#' to the table along with the difference's confidence interval and a p-value (when applicable). +#' +#' @param x (`tbl_summary`)\cr +#' table created with `tbl_summary()` +#' @param test ([`formula-list-selector`][syntax])\cr +#' Specifies the tests/methods to perform for each variable, e.g. +#' `list(all_continuous() ~ "t.test", all_dichotomous() ~ "prop.test", all_categorical(FALSE) ~ "smd")`. +#' +#' See below for details on default tests and [?tests][tests] for details on available +#' tests and creating custom tests. +#' @param estimate_fun ([`formula-list-selector`][syntax])\cr +#' List of formulas specifying the functions +#' to round and format differences and confidence limits. +#' Default is +#' `list(c(all_continuous(), all_categorical(FALSE)) ~ styfn_sigfig(), all_categorical() ~ \(x) paste0(style_sigfig(x, scale = 100), "%"))` +#' @param conf.level (`numeric`)\cr +#' a scalar in `⁠(0, 1`)⁠ indicating the confidence level. Default is 0.95 #' @inheritParams add_p.tbl_summary -#' @inheritParams tbl_regression -#' @param adj.vars Variables to include in mean difference adjustment (e.g. in ANCOVA models) -#' @param estimate_fun List of formulas specifying the formatting functions -#' to round and format differences. Default is -#' `list(all_continuous() ~ style_sigfig, all_categorical() ~ function(x) paste0(style_sigfig(x * 100), "%"))` -#' Function to round and format difference. Default is [style_sigfig()] -#' @param test List of formulas specifying statistical tests to perform for each variable, -#' e.g. `list(all_continuous() ~ "t.test")`. -#' Common tests include `"t.test"` or `"ancova"` for continuous data, and -#' `"prop.test"` for dichotomous variables. -#' See [tests] for details and more tests. +#' #' @export -#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary +#' @return a gtsummary table of class `"tbl_summary"` #' -#' @examples -#' \donttest{ +#' @examplesIf gtsummary:::is_pkg_installed("cardx", reference_pkg = "gtsummary") && gtsummary:::is_pkg_installed("broom", reference_pkg = "cardx") #' # Example 1 ---------------------------------- #' add_difference_ex1 <- -#' trial %>% +#' trial |> #' select(trt, age, marker, response, death) %>% #' tbl_summary( #' by = trt, @@ -33,228 +56,162 @@ #' all_dichotomous() ~ "{p}%" #' ), #' missing = "no" -#' ) %>% -#' add_n() %>% +#' ) |> +#' # add_n() |> #' add_difference() #' #' # Example 2 ---------------------------------- #' # ANCOVA adjusted for grade and stage #' add_difference_ex2 <- -#' trial %>% +#' trial |> #' select(trt, age, marker, grade, stage) %>% #' tbl_summary( #' by = trt, #' statistic = list(all_continuous() ~ "{mean} ({sd})"), #' missing = "no", #' include = c(age, marker, trt) -#' ) %>% -#' add_n() %>% +#' ) |> +#' # add_n() |> #' add_difference(adj.vars = c(grade, stage)) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_difference_ex1.png", width = "60")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_difference_ex2.png", width = "60")` -#' }} -add_difference <- function(x, test = NULL, group = NULL, - adj.vars = NULL, test.args = NULL, - conf.level = 0.95, include = everything(), - pvalue_fun = NULL, estimate_fun = NULL) { - # checking inputs ------------------------------------------------------------ - updated_call_list <- c(x$call_list, list(add_difference = match.call())) - .assert_class(x, c("tbl_summary", "tbl_svysummary")) +add_difference.tbl_summary <- function(x, + test = NULL, + group = NULL, + adj.vars = NULL, + test.args = NULL, + conf.level = 0.95, + include = everything(), + pvalue_fun = styfn_pvalue(digits = 1), + estimate_fun = list( + c(all_continuous(), all_categorical(FALSE)) ~ styfn_sigfig(), + all_dichotomous() ~ function(x) ifelse(is.na(x), NA_character_, paste0(style_sigfig(x, scale = 100), "%")), + all_tests("smd") ~ styfn_sigfig() + ), + ...) { + set_cli_abort_call() + # check/process inputs ------------------------------------------------------- + check_dots_empty() + updated_call_list <- c(x$call_list, list(add_p = match.call())) - if (is.null(x$by) || nrow(x$df_by) != 2) { - stop("'tbl_summary'/'tbl_svysummary' object must have a `by=` value with exactly two levels", call. = FALSE) - } - if (any(c("add_p", "add_difference") %in% names(x$call_list))) { - stop("`add_difference()` cannot be run after `add_p()` or `add_difference()`", call. = FALSE) - } if (rlang::is_function(estimate_fun)) { lifecycle::deprecate_stop( "1.4.0", - "gtsummary::add_difference(estimate_fun = 'must be a list of forumulas')", - details = "Argument has been converted to `list(everything() ~ estimate_fun)`" + "gtsummary::add_difference(estimate_fun = 'must be a list of forumulas')" ) } - # expanding formula lists/var selects ---------------------------------------- - include <- - .select_to_varnames( - select = {{ include }}, - data = select(.extract_data_frame(x$inputs$data), any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "include" - ) + # checking that input x has a by var and it has two levels + if (is_empty(x$inputs$by) || dplyr::n_distinct(x$inputs$data[[x$inputs$by]], na.rm = TRUE) != 2L) { + "Cannot run {.fun add_difference} when {.code tbl_summary(by)} column does not have exactly two levels." |> + cli::cli_abort(call = get_cli_abort_call()) + } - test <- - .formula_list_to_named_list( - x = test, - data = select(.extract_data_frame(x$inputs$data), any_of(include)), - var_info = x$table_body, - arg_name = "test", - type_check = chuck(type_check, "is_function_or_string", "fn"), - type_check_msg = chuck(type_check, "is_function_or_string", "msg") - ) + # if `pvalue_fun` not modified, check if we need to use a theme p-value + if (missing(pvalue_fun)) { + pvalue_fun <- + get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||% + get_theme_element("pkgwide-fn:pvalue_fun") %||% + pvalue_fun + } + pvalue_fun <- as_function(pvalue_fun) - estimate_fun <- - .formula_list_to_named_list( - x = {{ estimate_fun }}, - data = select(.extract_data_frame(x$inputs$data), any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "estimate_fun", - type_check = chuck(type_check, "is_function", "fn"), - type_check_msg = chuck(type_check, "is_function", "msg") - ) - estimate_fun <- - x$meta_data$variable %>% - map( - ~ estimate_fun[[.x]] %||% - switch(x$meta_data[x$meta_data$variable %in% .x, ]$summary_type %in% "dichotomous" && - !identical(test[[.x]], "smd"), - function(x) ifelse(!is.na(x), paste0(style_sigfig(x * 100), "%"), NA_character_) - ) %||% - style_sigfig - ) %>% - set_names(x$meta_data$variable) + cards::process_selectors( + select_prep(x$table_body, x$inputs$data[x$inputs$include]), + include = {{ include }} + ) - adj.vars <- - .select_to_varnames( - select = {{ adj.vars }}, - data = .extract_data_frame(x$inputs$data), - var_info = x$table_body, - arg_name = "adj.vars" - ) + # checking for `tbl_summary(percent = c("cell", "row"))`, which don't apply + if (!x$inputs$percent %in% "column" && + any(unlist(x$inputs$type[include]) %in% c("categorical", "dichotomous"))) { + cli::cli_warn(c( + "The {.code add_difference()} results for categorical variables may not + compatible with {.code tbl_summary(percent = c('cell', 'row'))}.", + i = "Use column percentages instead, {.code tbl_summary(percent = 'column')}." + )) + } - pvalue_fun <- - pvalue_fun %||% - get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||% - get_theme_element("pkgwide-fn:pvalue_fun") %||% - .get_deprecated_option("gtsummary.pvalue_fun", default = style_pvalue) %>% - gts_mapper("add_p(pvalue_fun=)") + cards::process_selectors(x$inputs$data, group = {{ group }}, adj.vars = {{ adj.vars }}) + check_scalar(group, allow_empty = TRUE) - group <- - .select_to_varnames( - select = {{ group }}, - data = .extract_data_frame(x$inputs$data), - var_info = x$table_body, - arg_name = "group", - select_single = TRUE + cards::process_formula_selectors( + select_prep(x$table_body, x$inputs$data[include]), + test = test, + include_env = TRUE + ) + # add the calling env to the test + test <- .add_env_to_list_elements(test, env = caller_env()) + + # select test ---------------------------------------------------------------- + test <- + assign_tests( + x = x, + test = test, + group = group, + adj.vars = adj.vars, + include = include, + calling_fun = "add_difference" ) - # checking for `tbl_summary(percent = c("cell", "row"))`, which don't apply - if (!identical(x$inputs$percent, "column")) { - bad_percent_vars <- - filter( - x$meta_data, - .data$summary_type %in% c("categorical", "dichotomous"), - .data$variable %in% include - ) %>% - pull("variable") - if (!rlang::is_empty(bad_percent_vars)) { - paste( - "{.code add_difference()} results for categorical variables", - "may not compatible with", - "{.code tbl_summary(percent = c(\"cell\", \"row\"))} options.", - "Use column percentages, {.code tbl_summary(percent = \"column\")}." - ) %>% - stringr::str_wrap() %>% - cli_alert_info() - } - } + # add all available test meta data to a data frame --------------------------- + df_test_meta_data <- + imap( + test, + ~ dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_) + ) |> + dplyr::bind_rows() - # caller_env for add_p - caller_env <- rlang::caller_env() + # add test names to `.$table_body` so it can be used in selectors ------------ + if (!"test_name" %in% names(x$table_body)) { + x$table_body <- + dplyr::left_join( + x$table_body, + df_test_meta_data[c("variable", "test_name")], + by = "variable" + ) |> + dplyr::relocate("test_name", .after = "variable") + } else { + x$table_body <- + dplyr::rows_update( + x$table_body, + df_test_meta_data[c("variable", "test_name")], + by = "variable", + unmatched = "ignore" + ) |> + dplyr::relocate("test_name", .after = "variable") + } - # getting the test name and pvalue - meta_data <- - x$meta_data %>% - select("variable", "summary_type") %>% - filter(.data$variable %in% include) %>% - mutate( - test = map2( - .data$variable, .data$summary_type, - function(variable, summary_type) { - .assign_test_add_diff( - data = x$inputs$data, variable = variable, summary_type = summary_type, - by = x$by, group = group, test = test, adj.vars = adj.vars - ) - } - ), - test_info = map( - .data$test, - function(test) { - .get_add_p_test_fun(class(x)[1], - test = test, - env = caller_env, - parent_fun = "add_difference" - ) - } - ), - test_name = map_chr(.data$test_info, ~ pluck(.x, "test_name")) - ) - # adding test_name to table body so it can be used to select vars by the test - x$table_body <- - left_join(x$table_body, meta_data[c("variable", "test_name")], by = "variable") %>% - select("variable", "test_name", everything()) + # now process the `test.args` and `estimate_fun` arguments ------------------- + cards::process_formula_selectors( + select_prep(x$table_body, x$inputs$data[include]), + estimate_fun = estimate_fun + ) + # fill in unspecified variables + cards::fill_formula_selectors( + select_prep(x$table_body, x$inputs$data[include]), + estimate_fun = eval(formals(asNamespace("gtsummary")[["add_difference.tbl_summary"]])[["estimate_fun"]]) + ) - # converting to named list - test.args <- - .formula_list_to_named_list( - x = test.args, - data = select(.extract_data_frame(x$inputs$data), any_of(include)), - var_info = x$table_body, - arg_name = "test.args", - type_check = chuck(type_check, "is_named", "fn"), - type_check_msg = chuck(type_check, "is_named", "msg") + cards::process_formula_selectors( + select_prep(x$table_body, x$inputs$data[include]), + test.args = test.args + ) + cards::check_list_elements( + test.args, + predicate = \(x) is.list(x) && is_named(x), + error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.", + i = "Value must be a named list." ) + ) - x$meta_data <- - meta_data %>% - mutate( - test_result = pmap( - list(.data$test_info, .data$variable, .data$summary_type), - function(test_info, variable, summary_type) { - .run_add_p_test_fun( - x = test_info, data = .env$x$inputs$data, - by = .env$x$by, variable = variable, - group = group, type = summary_type, - test.args = test.args[[variable]], - conf.level = conf.level, tbl = x, - adj.vars = adj.vars - ) - } - ) - ) %>% - select("variable", "test_result") %>% - { - left_join(x$meta_data, ., by = "variable") - } - + # calculate tests ------------------------------------------------------------ x <- - add_p_merge_p_values( - x = x, - lgl_add_p = FALSE, - meta_data = x$meta_data, - pvalue_fun = pvalue_fun, - estimate_fun = estimate_fun, - conf.level = conf.level, - adj.vars = adj.vars + calculate_and_add_test_results( + x = x, include = include, group = group, test.args = test.args, adj.vars = adj.vars, + df_test_meta_data = df_test_meta_data, conf.level = conf.level, + pvalue_fun = pvalue_fun, estimate_fun = estimate_fun, calling_fun = "add_difference" ) - x$call_list <- updated_call_list - # running any additional mods ------------------------------------------------ - x <- - get_theme_element("add_difference-fn:addnl-fn-to-run", default = identity) %>% - do.call(list(x)) + # update call list + x$call_list <- updated_call_list - # return results ------------------------------------------------------------- x } diff --git a/R/add_n.R b/R/add_n.R index 4dc4034fd..c6db2ed24 100644 --- a/R/add_n.R +++ b/R/add_n.R @@ -1,14 +1,18 @@ -#' Adds column with N to gtsummary table +#' Add column with N #' -#' @param x Object created from a gtsummary function -#' @param ... Additional arguments passed to other methods. -#' @author Daniel D. Sjoberg +#' - [`add_n.tbl_summary()`] #' +#' @param x (`gtsummary`)\cr +#' Object with class 'gtsummary' +#' @param ... Passed to other methods. #' @keywords internal -#' @seealso [add_n.tbl_summary()], [add_n.tbl_svysummary()], [add_n.tbl_survfit()], -#' [add_n.tbl_regression], [add_n.tbl_uvregression] +#' @author Daniel D. Sjoberg #' @export +#' +#' @seealso [`add_n.tbl_summary()`] add_n <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") UseMethod("add_n") } @@ -18,365 +22,177 @@ add_n <- function(x, ...) { #' For each variable in a `tbl_summary` table, the `add_n` function adds a column with the #' total number of non-missing (or missing) observations #' -#' @param x Object with class `tbl_summary` from the [tbl_summary] function or -#' with class `tbl_svysummary` from the [tbl_svysummary] function -#' @param statistic String indicating the statistic to report. Default is the -#' number of non-missing observation for each variable, `statistic = "{n}"`. -#' Other statistics available to report include: -#' * `"{N_obs}"` total number of observations, -#' * `"{N_nonmiss}"` number of non-missing observations, -#' * `"{N_miss}"` number of missing observations, -#' * `"{p_nonmiss}"` percent non-missing data, -#' * `"{p_miss}"` percent missing data -#' * survey summaries also have the following unweighted statistics available: -#' `"N_obs_unweighted"`, `"N_miss_unweighted"`, `"N_nonmiss_unweighted"`, `"p_miss_unweighted"`, `"p_nonmiss_unweighted"` +#' @param x (`tbl_summary`)\cr +#' Object with class `'tbl_summary'` created with [`tbl_summary()`] function. +#' @param statistic (`string`)\cr +#' String indicating the statistic to report. Default is the +#' number of non-missing observation for each variable, `statistic = "{N_nonmiss}"`. +#' All statistics available to report include: +#' +#' * `"{N_obs}"` total number of observations, +#' * `"{N_nonmiss}"` number of non-missing observations, +#' * `"{N_miss}"` number of missing observations, +#' * `"{p_nonmiss}"` percent non-missing data, +#' * `"{p_miss}"` percent missing data +#' +#' The argument uses [`glue::glue()`] syntax and multiple statistics may be reported, +#' e.g. `statistic = "{N_nonmiss} / {N_obs} ({p_nonmiss}%)"` +#' @param col_label (`string`)\cr +#' String indicating the column label. Default is `"**N**"` +#' @param footnote (scalar `logical`)\cr +#' Logical argument indicating whether to print a footnote +#' clarifying the statistics presented. Default is `FALSE` +#' @param last (scalar `logical`)\cr +#' Logical indicator to include N column last in table. +#' Default is `FALSE`, which will display N column first. +#' @inheritParams rlang::args_dots_empty #' -#' The argument uses [glue::glue] syntax and multiple statistics may be reported, -#' e.g. `statistic = "{N_nonmiss} / {N_obs} ({p_nonmiss}%)"` -#' @param col_label String indicating the column label. Default is `"**N**"` -#' @param footnote Logical argument indicating whether to print a footnote -#' clarifying the statistics presented. Default is `FALSE` -#' @param last Logical indicator to include N column last in table. -#' Default is `FALSE`, which will display N column first. -#' @param ... Not used -#' @family tbl_summary tools -#' @family tbl_svysummary tools #' @author Daniel D. Sjoberg #' @export -#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary -#' @rdname add_n.tbl_summary -#' @return A `tbl_summary` or `tbl_svysummary` object +#' +#' @return A table of class `c('tbl_summary', 'gtsummary')` #' @examples #' # Example 1 ---------------------------------- -#' tbl_n_ex <- -#' trial[c("trt", "age", "grade", "response")] %>% -#' tbl_summary(by = trt) %>% +#' trial |> +#' tbl_summary(by = trt, include = c(trt, age, grade, response)) |> #' add_n() -#' @section Example Output: -#' -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "tbl_n_ex.png", width = "50")` -#' }} - -add_n.tbl_summary <- function(x, statistic = "{n}", col_label = "**N**", footnote = FALSE, +add_n.tbl_summary <- function(x, statistic = "{N_nonmiss}", col_label = "**N**", footnote = FALSE, last = FALSE, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_n = match.call())) - # checking that input is class tbl_summary - if (!(inherits(x, "tbl_summary") | inherits(x, "tbl_svysummary"))) { - stop("`x` must be class 'tbl_summary' or 'tbl_svysummary'") - } - - # grabbing summary counts ---------------------------------------------------- - df_stats <- - x$meta_data$df_stats %>% - map_dfr( - function(.x) { - df_stats <- - # remove overall row, if it has been added with `add_overall()` - .purrr_when( - "by" %in% names(.x) ~ filter(.x, !is.na(.data$by)), - TRUE ~ .x - ) %>% - select(any_of(c( - "variable", "by", "N_obs", "N_miss", "N_nonmiss", "p_miss", - "p_nonmiss", "N_obs_unweighted", "N_miss_unweighted", - "N_nonmiss_unweighted", "p_miss_unweighted", - "p_nonmiss_unweighted" - ))) %>% - distinct() %>% - # summing counts within by variable within by levels - dplyr::group_by_at(c("variable", "by") %>% intersect(names(.))) %>% - mutate_at(vars(-any_of(c("variable", "by"))), sum) %>% - select(-any_of("by")) %>% - distinct() - - # correcting percentages ----------------------------------------------- - if ("p_miss" %in% names(df_stats)) { - df_stats <- mutate(df_stats, p_miss = .data$N_miss / .data$N_obs) - } - if ("p_nonmiss" %in% names(df_stats)) { - df_stats <- mutate(df_stats, p_nonmiss = .data$N_nonmiss / .data$N_obs) - } - if ("p_miss_unweighted" %in% names(df_stats)) { - df_stats <- mutate(df_stats, p_miss_unweighted = .data$N_miss_unweighted / .data$N_obs_unweighted) - } - if ("p_nonmiss_unweighted" %in% names(df_stats)) { - df_stats <- mutate(df_stats, p_nonmiss_unweighted = .data$N_nonmiss_unweighted / .data$N_obs_unweighted) - } - - # styling the statistics ----------------------------------------------- - for (v in (names(df_stats) %>% setdiff("variable"))) { - df_stats[[v]] <- df_stats[[v]] %>% attr(.x[[v]], "fmt_fun")() - } - - # returning formatted df ----------------------------------------------- - df_stats %>% - # adding these cols for backwards compatibility (documentation of these names was dropped on 2022-04-03) - mutate( - N = .data$N_obs, - n = .data$N_nonmiss, - n_miss = .data$N_miss, - p = .data$p_nonmiss - ) - } - ) %>% - # making the row that will be merged into table_body ----------------------- - mutate( - statistic = glue(.env$statistic) %>% as.character(), - row_type = "label" - ) %>% - select("variable", "row_type", n = "statistic") - - # merging result with existing tbl_summary ----------------------------------- - x$table_body <- - left_join(x$table_body, df_stats, by = c("variable", "row_type")) - if (last == FALSE) { - x$table_body <- - select(x$table_body, any_of(c("variable", "row_type", "label", "n")), everything()) + set_cli_abort_call() + updated_call_list <- c(x[["call_list"]], list(add_n = match.call())) + + # check inputs --------------------------------------------------------------- + check_string(statistic) + check_string(col_label) + check_scalar_logical(footnote) + check_scalar_logical(last) + + # calculate/grab the needed ARD results -------------------------------------- + if ("add_overall" %in% names(x[["call_list"]])) { + # TODO: If `add_overall()` was previously run, we can get the stats from there instead of re-calculating + } else if (is_empty(x$inputs$by)) { + # TODO: If `tbl_summary(by)` is empty, then we can grab this from `x$card%tbl_summary` + x$cards$add_n <- + x[["cards"]][[1]] |> + dplyr::filter( + .data$variable %in% .env$x$inputs$include, + .data$context %in% "missing" + ) |> + cards::apply_fmt_fn() + } else { + x$cards$add_n <- + cards::ard_missing( + data = x$inputs$data, + variables = x$inputs$include, + by = character(0L), + # TODO: Utilize themes to change the default formatting types + fmt_fn = ~ list( + starts_with("N_") ~ styfn_number(), + starts_with("p_") ~ styfn_percent() + ) + ) |> + cards::apply_fmt_fn() } - # updating table_styling ----------------------------------------------------- - x <- - .update_table_styling(x) %>% - modify_header(n = col_label) - # Adding footnote if requested ----------------------------------------------- - if (footnote == TRUE) { - x <- modify_footnote(x, n ~ stat_to_label(statistic)) + # check statistic argument --------------------------------------------------- + if (is_empty(.extract_glue_elements(statistic))) { + cli::cli_abort( + c("No glue elements found in the {.arg statistic} argument ({.val {statistic}}).", + i = "Do you need to wrap the statistic name in curly brackets, e.g. {.val {{N_nonmiss}}}?" + ), + call = get_cli_abort_call() + ) } - - # fill in the Ns in the header table modify_stat_* columns - x <- .fill_table_header_modify_stats(x) - # adding indicator to output that add_n was run on this data - x$call_list <- updated_call_list - # returning tbl_summary object - x -} - -stat_to_label <- function(x) { - language <- get_theme_element("pkgwide-str:language", default = "en") - df_statistic_names <- - tibble::tribble( - ~stat, ~name, - "{N}", "Total N", - "{n}", "N not Missing", - "{n_miss}", "N Missing", - "{p}%", "% not Missing", - "{p}", "% not Missing", - "{p_miss}%", "% Missing", - "{p_miss}", "% Missing", - "{N_obs}", "Total N", - "{N_miss}", "N Missing", - "{N_nonmiss}", "N not Missing", - "{p_nonmiss}", "% not Missing", - "{N_obs_unweighted}", "Total N (unweighted)", - "{N_miss_unweighted}", "N Missing (unweighted)", - "{N_nonmiss_unweighted}", "N not Missing (unweighted)", - "{p_miss_unweighted}", "% Missing (unweighted)", - "{p_nonmiss_unweighted}", "% not Missing (unweighted)" - ) %>% - mutate(name = map_chr(.data$name, ~ translate_text(.x, language))) - - for (i in seq_len(nrow(df_statistic_names))) { - x <- stringr::str_replace_all( - x, - fixed(df_statistic_names$stat[i]), - fixed(df_statistic_names$name[i]) + if (any(!.extract_glue_elements(statistic) %in% x$cards$add_n$stat_name)) { + missing_stats <- .extract_glue_elements(statistic) |> setdiff(x$cards$add_n$stat_name) + cli::cli_abort( + c("The following statistics are not valid for the {.arg statistic} argument: {.val {missing_stats}}.", + i = "Select from {.val {unique(x$cards$add_n$stat_name)}}." + ), + call = get_cli_abort_call() ) } - x -} - -#' @export -#' @rdname add_n.tbl_summary -add_n.tbl_svysummary <- add_n.tbl_summary - -#' Add column with number of observations -#' -#' \lifecycle{maturing} -#' For each `survfit()` object summarized with `tbl_survfit()` this function -#' will add the total number of observations in a new column. -#' -#' @param x object of class "`tbl_survfit`" -#' @param ... Not used -#' @export -#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary -#' @family tbl_survfit tools -#' @examplesIf broom.helpers::.assert_package("survival", pkg_search = "gtsummary", boolean = TRUE) -#' \donttest{ -#' library(survival) -#' fit1 <- survfit(Surv(ttdeath, death) ~ 1, trial) -#' fit2 <- survfit(Surv(ttdeath, death) ~ trt, trial) -#' -#' # Example 1 ---------------------------------- -#' add_n.tbl_survfit_ex1 <- -#' list(fit1, fit2) %>% -#' tbl_survfit(times = c(12, 24)) %>% -#' add_n() -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_n.tbl_survfit_ex1.png", width = "64")` -#' }} - -add_n.tbl_survfit <- function(x, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_n = match.call())) - - # adding N to the table_body ------------------------------------------------- - x$table_body <- - purrr::map2_dfr( - x$meta_data$survfit, - x$meta_data$variable, - function(suvfit, variable) { - # extracting survfit call - survfit_call <- suvfit$call %>% as.list() - # index of formula and data - call_index <- names(survfit_call) %in% c("formula", "data") %>% which() - - # converting call into a survdiff call - model.frame_call <- rlang::call2(rlang::expr(stats::model.frame), !!!survfit_call[call_index], ...) - - # returning number of rows in data frame - tibble( - variable = variable, - row_type = "label", - N = safe_survfit_eval(model.frame_call) %>% nrow() + # prepare ARD data frame ----------------------------------------------------- + cards <- + dplyr::bind_rows( + x$cards$add_n, + x[["cards"]][[1]] |> + dplyr::filter( + .data$variable %in% .env$x$inputs$include, + .data$context %in% "attributes" ) - } + ) |> + dplyr::mutate( + gts_column = ifelse(.data$context %in% "missing", "n", NA_character_) ) %>% - { - left_join( - x$table_body, ., - by = c("variable", "row_type") - ) - } %>% - select(any_of(c("variable", "row_type", "label", "N")), everything()) - - # adding styling data for N column ------------------------------------------- - x <- - modify_table_styling( - x, - columns = "N", - label = "**N**", - fmt_fun = style_number, - hide = FALSE + # adding `'{n}'` to ARD data frame + # Prior to v2.0 release, the default value was `statistic="{n}"` + # Documentation of {n} was removed in v2.0, so we can remove this chunk at some + # point in the future. (May 2024) + dplyr::bind_rows( + dplyr::filter( + ., + .data$context %in% "missing", + .data$stat_name %in% c("N_obs", "N_miss", "N_nonmiss", "p_nonmiss"), + ) |> + dplyr::mutate( + stat_name = + dplyr::case_when( + .data$stat_name %in% "N_nonmiss" ~ "n", + # documentation for the stats below were removed on 2022-04-03 + .data$stat_name %in% "N_obs" ~ "N", + .data$stat_name %in% "N_miss" ~ "n_miss", + .data$stat_name %in% "p_nonmiss" ~ "p" + ) + ) ) - # fill in the Ns in the header table modify_stat_* columns - x <- .fill_table_header_modify_stats(x) - # adding indicator to output that add_n was run on this data - x$call_list <- updated_call_list - x -} - -#' Add N to regression table -#' -#' @param x a `tbl_regression` or `tbl_uvregression` table -#' @param location location to place Ns. When `"label"` total Ns are placed -#' on each variable's label row. When `"level"` level counts are placed on the -#' variable level for categorical variables, and total N on the variable's label -#' row for continuous. -#' @param ... Not used -#' -#' @name add_n_regression -#' @examples -#' \donttest{ -#' # Example 1 ---------------------------------- -#' add_n.tbl_regression_ex1 <- -#' trial %>% -#' select(response, age, grade) %>% -#' tbl_uvregression( -#' y = response, -#' method = glm, -#' method.args = list(family = binomial), -#' hide_n = TRUE -#' ) %>% -#' add_n(location = "label") -#' -#' # Example 2 ---------------------------------- -#' add_n.tbl_regression_ex2 <- -#' glm(response ~ age + grade, trial, family = binomial) %>% -#' tbl_regression(exponentiate = TRUE) %>% -#' add_n(location = "level") -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_n.tbl_regression_ex1.png", width = "64")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_n.tbl_regression_ex2.png", width = "64")` -#' }} -NULL + # create tibble to merge with primary table ---------------------------------- + df_results <- + pier_summary_dichotomous( + cards = cards, + variables = x$inputs$include, + statistic = rep_named(x$inputs$include, x = list(statistic)) + ) |> + dplyr::select("variable", "row_type", "n") + + # add results to primary table ----------------------------------------------- + x <- x |> + modify_table_body( + \(table_body) { + table_body <- + dplyr::left_join( + table_body, + df_results, + by = c("variable", "row_type") + ) -#' @rdname add_n_regression -#' @export -add_n.tbl_regression <- function(x, location = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_n = match.call())) - location <- match.arg(location, choices = c("label", "level"), several.ok = TRUE) + if (isFALSE(last)) { + table_body <- dplyr::relocate(table_body, "n", .after = "label") + } + table_body + } + ) |> + modify_header(n = col_label) - if ("level" %in% location && !"n_obs" %in% x$table_styling$header$column) { - abort("Reporting N on level rows is not available for this model type.") - } - if ("label" %in% location && !"N_obs" %in% x$table_styling$header$column) { - abort("Reporting N on label rows is not available for this model type.") - } + # add footnote if requested by user ------------------------------------------ + if (footnote) { + footnote_text <- + eval_tidy( + expr = expr(glue(statistic)), + data = + x$cards$add_n |> + dplyr::slice(.by = "stat_name", 1L) |> + cards::get_ard_statistics(.column = "stat_label") + ) - x$table_body$stat_n <- NA_integer_ - if ("N_obs" %in% names(x$table_body)) { - x$table_body$stat_n <- ifelse(x$table_body$row_type == "label", - x$table_body$N_obs %>% as.integer(), - x$table_body$stat_n - ) + x <- modify_table_styling(x, columns = "n", footnote = footnote_text) } - if ("n_obs" %in% names(x$table_body)) { - x$table_body$stat_n <- ifelse(x$table_body$row_type == "level", - x$table_body$n_obs %>% as.integer(), - x$table_body$stat_n - ) - } - x <- - x %>% - modify_table_body( - mutate, - stat_n = - case_when( - !"level" %in% .env$location & .data$row_type %in% "level" ~ NA_integer_, - !"label" %in% .env$location & .data$row_type %in% "label" & - .data$var_type %in% c("categorical", "dichotomous") ~ NA_integer_, - TRUE ~ .data$stat_n - ) - ) %>% - modify_table_body( - dplyr::relocate, - "stat_n", - .after = "label" - ) %>% - modify_table_styling( - columns = all_of("stat_n"), - label = "**N**", - hide = FALSE, - fmt_fun = style_number - ) - # fill in the Ns in the header table modify_stat_* columns - x <- .fill_table_header_modify_stats(x) + # return final table --------------------------------------------------------- + # add updated call x$call_list <- updated_call_list x } - -#' @export -#' @rdname add_n_regression -add_n.tbl_uvregression <- add_n.tbl_regression diff --git a/R/add_n.tbl_regression.R b/R/add_n.tbl_regression.R new file mode 100644 index 000000000..99dadfbe3 --- /dev/null +++ b/R/add_n.tbl_regression.R @@ -0,0 +1,96 @@ +#' Add N to regression table +#' +#' @param x (`tbl_regression`\`tbl_uvregression`)\cr +#' a `tbl_regression` or `tbl_uvregression` table +#' @param location (`character`)\cr +#' location to place Ns. Select one or more of `c('label', 'level')`. +#' Default is `'label'`. +#' +#' When `"label"` total Ns are placed +#' on each variable's label row. When `"level"` level counts are placed on the +#' variable level for categorical variables, and total N on the variable's label +#' row for continuous. +#' @inheritParams rlang::args_dots_empty +#' +#' @name add_n.tbl_regression +#' @examples +#' # TODO: Re-add after tbl_uvregression() is added +#' # # Example 1 ---------------------------------- +#' # add_n.tbl_regression_ex1 <- +#' # trial %>% +#' # select(response, age, grade) %>% +#' # tbl_uvregression( +#' # y = response, +#' # method = glm, +#' # method.args = list(family = binomial), +#' # hide_n = TRUE +#' # ) %>% +#' # add_n(location = "label") +#' +#' # Example 2 ---------------------------------- +#' glm(response ~ age + grade, trial, family = binomial) %>% +#' tbl_regression(exponentiate = TRUE) %>% +#' add_n(location = "level") +NULL + +#' @rdname add_n.tbl_regression +#' @export +add_n.tbl_regression <- function(x, location = "label", ...) { + set_cli_abort_call() + check_dots_empty() + updated_call_list <- c(x$call_list, list(add_nevent = match.call())) + + # process inputs ------------------------------------------------------------- + location <- arg_match(location, values = c("label", "level"), multiple = TRUE) + + if ("level" %in% location && !"n_obs" %in% x$table_styling$header$column) { + cli::cli_abort( + "Reporting N on level rows is not available for this model type.", + call = get_cli_abort_call() + ) + } + if ("label" %in% location && !"N_obs" %in% x$table_styling$header$column) { + cli::cli_abort( + "Reporting N on label rows is not available for this model type.", + call = get_cli_abort_call() + ) + } + + x$table_body$stat_n <- NA_integer_ + if ("N_obs" %in% names(x$table_body)) { + x$table_body$stat_n <- ifelse(x$table_body$row_type == "label", + x$table_body$N_obs %>% as.integer(), + x$table_body$stat_n + ) + } + if ("n_obs" %in% names(x$table_body)) { + x$table_body$stat_n <- ifelse(x$table_body$row_type == "level", + x$table_body$n_obs %>% as.integer(), + x$table_body$stat_n + ) + } + x <- + x |> + modify_table_body( + mutate, + stat_n = + dplyr::case_when( + !"level" %in% .env$location & .data$row_type %in% "level" ~ NA_integer_, + !"label" %in% .env$location & .data$row_type %in% "label" & + .data$var_type %in% c("categorical", "dichotomous") ~ NA_integer_, + TRUE ~ .data$stat_n + ) + ) |> + modify_table_body(dplyr::relocate, "stat_n", .after = "label") |> + modify_table_styling( + columns = all_of("stat_n"), + label = "**N**", + hide = FALSE, + fmt_fun = styfn_number() + ) + + # fill in the Ns in the header table modify_stat_* columns + x <- .fill_table_header_modify_stats(x) + x$call_list <- updated_call_list + x +} diff --git a/R/add_nevent.R b/R/add_nevent.R index 8f738ac83..a673ac07a 100644 --- a/R/add_nevent.R +++ b/R/add_nevent.R @@ -1,104 +1,78 @@ -#' Add number of events to a regression table +#' Add p-values #' -#' Adds a column of the number of events to tables created with -#' [tbl_regression] or [tbl_uvregression]. Supported -#' model types are among GLMs with binomial distribution family (e.g. -#' [stats::glm], `lme4::glmer`, and -#' `geepack::geeglm`) and Cox -#' Proportion Hazards regression models ([survival::coxph]). +#' - [`add_nevent.tbl_regression()`] #' -#' @param x `tbl_regression` or `tbl_uvregression` object -#' @param ... Additional arguments passed to or from other methods. +#' @param x (`gtsummary`)\cr +#' Object with class 'gtsummary' +#' @param ... Passed to other methods. #' @keywords internal -#' @export #' @author Daniel D. Sjoberg -#' @seealso [add_nevent.tbl_regression], [add_nevent.tbl_uvregression], -#' [add_nevent.tbl_survfit] - -add_nevent <- function(x, ...) UseMethod("add_nevent") - -#' Add event N to regression table -#' -#' @inheritParams add_n_regression -#' @name add_nevent_regression -#' -#' @examples -#' \donttest{ -#' # Example 1 ---------------------------------- -#' add_nevent.tbl_regression_ex1 <- -#' trial %>% -#' select(response, trt, grade) %>% -#' tbl_uvregression( -#' y = response, -#' method = glm, -#' method.args = list(family = binomial), -#' ) %>% -#' add_nevent() -#' # Example 2 ---------------------------------- -#' add_nevent.tbl_regression_ex2 <- -#' glm(response ~ age + grade, trial, family = binomial) %>% -#' tbl_regression(exponentiate = TRUE) %>% -#' add_nevent(location = "level") -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_nevent.tbl_regression_ex1.png", width = "64")` -#' }} -#' -#' \if{html}{Example 2} +#' @export #' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_nevent.tbl_regression_ex2.png", width = "64")` -#' }} -NULL +#' @seealso [`add_p.tbl_summary()`] +add_nevent <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + UseMethod("add_nevent") +} + -#' @rdname add_nevent_regression #' @export -add_nevent.tbl_regression <- function(x, location = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) +#' @rdname add_nevent +add_nevent.tbl_regression <- function(x, location = "label", ...) { + set_cli_abort_call() + check_dots_empty() updated_call_list <- c(x$call_list, list(add_nevent = match.call())) - location <- match.arg(location, choices = c("label", "level"), several.ok = TRUE) + + # process inputs ------------------------------------------------------------- + location <- arg_match(location, values = c("label", "level"), multiple = TRUE) if ("level" %in% location && !"n_event" %in% x$table_styling$header$column) { - abort("Reporting event N on level rows is not available for this model type.") + cli::cli_abort( + "Reporting event N on level rows is not available for this model type.", + call = get_cli_abort_call() + ) } if ("label" %in% location && !"N_event" %in% x$table_styling$header$column) { - abort("Reporting event N on label rows is not available for this model type.") + cli::cli_abort( + "Reporting event N on label rows is not available for this model type.", + call = get_cli_abort_call() + ) } x$table_body$stat_nevent <- NA_integer_ if ("N_event" %in% names(x$table_body)) { x$table_body$stat_nevent <- ifelse(x$table_body$row_type == "label", - x$table_body$N_event %>% as.integer(), - x$table_body$stat_nevent + x$table_body$N_event %>% as.integer(), + x$table_body$stat_nevent ) } if ("n_event" %in% names(x$table_body)) { x$table_body$stat_nevent <- ifelse(x$table_body$row_type == "level", - x$table_body$n_event %>% as.integer(), - x$table_body$stat_nevent + x$table_body$n_event %>% as.integer(), + x$table_body$stat_nevent ) } + x <- - x %>% modify_table_body( - mutate, + x, + dplyr::mutate, stat_nevent = - case_when( + dplyr::case_when( !"level" %in% .env$location & .data$row_type %in% "level" ~ NA_integer_, !"label" %in% .env$location & .data$row_type %in% "label" & .data$var_type %in% c("categorical", "dichotomous") ~ NA_integer_, TRUE ~ .data$stat_nevent ) - ) %>% - modify_table_body( - dplyr::relocate, - "stat_nevent", - .before = "estimate" - ) %>% - modify_header(stat_nevent ~ "**Event N**") + ) |> + modify_table_body(dplyr::relocate, "stat_nevent", .before = "estimate") |> + modify_table_styling( + columns = all_of("stat_nevent"), + label = "**Event N**", + hide = FALSE, + fmt_fun = styfn_number() + ) # fill in the Ns in the header table modify_stat_* columns x <- .fill_table_header_modify_stats(x) @@ -107,85 +81,20 @@ add_nevent.tbl_regression <- function(x, location = NULL, ...) { x } -#' @export -#' @rdname add_nevent_regression -add_nevent.tbl_uvregression <- add_nevent.tbl_regression - -#' Add column with number of observed events -#' -#' \lifecycle{maturing} -#' For each `survfit()` object summarized with `tbl_survfit()` this function -#' will add the total number of events observed in a new column. -#' -#' @param x object of class 'tbl_survfit' -#' @param ... Not used -#' @export -#' @family tbl_survfit tools -#' @examplesIf broom.helpers::.assert_package("survival", pkg_search = "gtsummary", boolean = TRUE) -#' \donttest{ -#' library(survival) -#' fit1 <- survfit(Surv(ttdeath, death) ~ 1, trial) -#' fit2 <- survfit(Surv(ttdeath, death) ~ trt, trial) -#' -#' # Example 1 ---------------------------------- -#' add_nevent.tbl_survfit_ex1 <- -#' list(fit1, fit2) %>% -#' tbl_survfit(times = c(12, 24)) %>% -#' add_n() %>% -#' add_nevent() -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_nevent.tbl_survfit_ex1.png", width = "64")` -#' }} - -add_nevent.tbl_survfit <- function(x, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_nevent = match.call())) - - # checking survfit is a standard (not multi-state) - if (!purrr::every(x$meta_data$survfit, ~ rlang::is_empty(setdiff(class(.x), c("survfit", "survfit2"))))) { - paste( - "Each of the `survfit()` objects must have class 'survfit' only.", - "Multi-state models are not supported by this function." - ) %>% - stringr::str_wrap() %>% - stop(call. = FALSE) - } - - # calculating event N -------------------------------------------------------- - x$table_body <- - purrr::map2_dfr( - x$meta_data$survfit, x$meta_data$variable, - ~ tibble( - nevent = broom::tidy(.x) %>% pull("n.event") %>% sum(), - variable = .y, - row_type = "label" - ) - ) %>% - { - left_join( - x$table_body, ., - by = c("variable", "row_type") - ) - } %>% - select(any_of(c("variable", "row_type", "label", "N", "nevent")), everything()) +# this function is used to fill in missing values in the +# x$table_styling$header$modify_stat_* columns +.fill_table_header_modify_stats <- function(x, + modify_stats = + c("modify_stat_N", "modify_stat_N_event", + "modify_stat_N_unweighted")) { + modify_stats <- + x$table_styling$header |> + select(any_of(modify_stats) & where(\(x) dplyr::n_distinct(x, na.rm = TRUE) == 1L)) %>% + names() - # adding N to table_styling and assigning header label ----------------------- - x <- - modify_table_styling( - x, - columns = "nevent", - label = "**Event N**", - fmt_fun = style_number, - hide = FALSE - ) + x$table_styling$header <- + x$table_styling$header %>% + tidyr::fill(any_of(modify_stats), .direction = "downup") - # fill in the Ns in the header table modify_stat_* columns - x <- .fill_table_header_modify_stats(x) - # adding indicator to output that add_n was run on this data - x$call_list <- updated_call_list - x + return(x) } diff --git a/R/add_overall.R b/R/add_overall.R index 1d91106ef..8ec591190 100644 --- a/R/add_overall.R +++ b/R/add_overall.R @@ -1,258 +1,175 @@ -#' Add column with overall summary statistics +#' Add overall column +#' +#' - [`add_overall.tbl_summary()`] +#' +#' @param x (`gtsummary`)\cr +#' Object with class 'gtsummary' +#' @param ... Passed to other methods. +#' @keywords internal +#' @author Daniel D. Sjoberg +#' @export +#' +#' @seealso [`add_overall.tbl_summary()`] +add_overall <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + UseMethod("add_overall") +} + +#' Add overall column #' #' Adds a column with overall summary statistics to tables #' created by `tbl_summary`, `tbl_svysummary`, `tbl_continuous` or #' `tbl_custom_summary`. #' -#' @param x Object with class `tbl_summary` from the [tbl_summary] function, -#' object with class `tbl_svysummary` from the [tbl_svysummary] function, -#' object with class `tbl_continuous` from the [tbl_continuous] function or -#' object with class `tbl_custom_summary` from the [tbl_custom_summary] function. +#' @param x (`tbl_summary`\`tbl_svysummary`\`tbl_continuous`\`tbl_custom_summary`)\cr +#' A stratified 'gtsummary' table #' @param last Logical indicator to display overall column last in table. #' Default is `FALSE`, which will display overall column first. -#' @param col_label String indicating the column label. Default is `"**Overall**, N = {N}"` +#' @param col_label String indicating the column label. Default is `"**Overall** \nN = {N}"` #' @param statistic Override the statistic argument in initial `tbl_*` function. #' call. Default is `NULL`. #' @param digits Override the digits argument in initial `tbl_*` function #' call. Default is `NULL`. -#' @param ... Not used -#' @family tbl_summary tools -#' @family tbl_svysummary tools -#' @family tbl_continuous tools -#' @family tbl_custom_summary tools +#' @inheritParams rlang::args_dots_empty +#' #' @author Daniel D. Sjoberg #' @export -#' @return A `tbl_*` of same class as `x` +#' @return A `gtsummary` of same class as `x` #' @examples -#' \donttest{ #' # Example 1 ---------------------------------- -#' tbl_overall_ex1 <- -#' trial %>% -#' tbl_summary(include = c(age, grade), by = trt) %>% +#' trial |> +#' tbl_summary(include = c(age, grade), by = trt) |> #' add_overall() #' #' # Example 2 ---------------------------------- -#' tbl_overall_ex2 <- -#' trial %>% +#' trial |> #' tbl_summary( #' include = grade, #' by = trt, #' percent = "row", #' statistic = ~"{p}%", #' digits = ~1 -#' ) %>% +#' ) |> #' add_overall( #' last = TRUE, #' statistic = ~"{p}% (n={n})", #' digits = ~ c(1, 0) #' ) #' -#' # Example 3 ---------------------------------- -#' tbl_overall_ex3 <- -#' trial %>% -#' tbl_continuous( -#' variable = age, -#' by = trt, -#' include = grade -#' ) %>% -#' add_overall(last = TRUE) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "tbl_overall_ex1.png", width = "55")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "tbl_overall_ex2.png", width = "55")` -#' }} -#' -#' \if{html}{Example 3} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "tbl_overall_ex3.png", width = "55")` -#' }} -add_overall <- function(x, ...) { - UseMethod("add_overall") -} - -#' @rdname add_overall -#' @export -add_overall.tbl_summary <- function(x, last = FALSE, col_label = NULL, +#' # TODO: Add this example after `tbl_continuous()` +#' # # Example 3 ---------------------------------- +#' # tbl_overall_ex3 <- +#' # trial %>% +#' # tbl_continuous( +#' # variable = age, +#' # by = trt, +#' # include = grade +#' # ) %>% +#' # add_overall(last = TRUE) +add_overall.tbl_summary <- function(x, last = FALSE, col_label = "**Overall** \nN = {N}", statistic = NULL, digits = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) + set_cli_abort_call() + check_dots_empty() + + # translating the col_label, if nothing passed by user + if (missing(col_label)) { + paste0("**", translate_text("Overall"), "** \nN = {N}") + } + add_overall_generic( - x = x, last = last, col_label = col_label, - statistic = statistic, digits = digits, - call = c(x$call_list, list(add_overall = match.call())) + x = x, + last = last, + col_label = col_label, + statistic = statistic, + digits = digits, + call = c(x$call_list, list(add_overall = match.call())), + calling_fun = "tbl_summary" ) } +add_overall_generic <- function(x, last, col_label, statistic, digits, call, calling_fun) { + check_scalar_logical(last) + check_string(col_label, allow_empty = TRUE) -#' @rdname add_overall -#' @export -add_overall.tbl_svysummary <- add_overall.tbl_summary - -#' @rdname add_overall -#' @export -add_overall.tbl_continuous <- add_overall.tbl_summary - - -#' @rdname add_overall -#' @export -add_overall.tbl_custom_summary <- add_overall.tbl_summary - - -add_overall_generic <- function(x, last, col_label, statistic, digits, call) { # checking that input x has a by var - if (is.null(x$inputs[["by"]])) { - paste( - "Cannot add Overall column when no `by=` variable in", - "original summary table call." - ) %>% - stop(call. = FALSE) + if (is_empty(x$inputs[["by"]])) { + cli::cli_abort( + "Cannot run {.fun add_overall} when original table function is not statified with {.code {calling_fun}(by)}.", + call = get_cli_abort_call() + ) } - x_copy <- x - - # removing 'by' variable from data - # (so it won't show up in the overall tbl_summary) - # x_copy$inputs[["data"]] <- - # select(.extract_data_frame(x$inputs[["data"]]), -x[["by"]]) - x_copy$inputs$include <- x_copy$inputs$include %>% setdiff(x$inputs$by) + # save arguments to pass to original function without `by` stratified -------- + args_overall <- x$inputs |> + utils::modifyList(list(by = NULL), keep.null = TRUE) # if overall row, already included in data ----------------------------------- - if (isTRUE(x$inputs$overall_row)) { - x_copy$inputs$overall_row <- FALSE + if (isTRUE(args_overall$overall_row)) { + args_overall$overall_row <- FALSE } - # evaluate statistic and digits args ----------------------------------------- - statistic <- - .formula_list_to_named_list( - x = statistic, - data = .extract_data_frame(x_copy$inputs$data), - var_info = x_copy$table_body, - arg_name = "statistic", - type_check = chuck(type_check, "is_character", "fn"), - type_check_msg = chuck(type_check, "is_character", "msg") - ) - x_copy$inputs$statistic <- - .formula_list_to_named_list( - x = x_copy$inputs$statistic, - data = .extract_data_frame(x_copy$inputs$data), - var_info = x_copy$table_body, - arg_name = "statistic" - ) - digits <- - .formula_list_to_named_list( - x = digits, - data = .extract_data_frame(x_copy$inputs$data), - var_info = x_copy$table_body, - arg_name = "digits", - type_check = chuck(type_check, "digits", "fn"), - type_check_msg = chuck(type_check, "digits", "msg") - ) - x_copy$inputs$digits <- - .formula_list_to_named_list( - x = x_copy$inputs$digits, - data = .extract_data_frame(x_copy$inputs$data), - var_info = x_copy$table_body, - arg_name = "digits" - ) - # if user passed updates statistics or digits, update the calls - if (!is.null(statistic)) { - x_copy$inputs$statistic <- - switch(is.null(x_copy$inputs$statistic), - statistic - ) %||% - purrr::list_modify(x_copy$inputs$statistic, !!!statistic) + # update statistic/digit argument as needed ---------------------------------- + if (!is_empty(statistic)) { + args_overall$statistic <- statistic } - if (!is.null(digits)) { - x_copy$inputs$digits <- - switch(is.null(x_copy$inputs$digits), - digits - ) %||% - purrr::list_modify(x_copy$inputs$digits, !!!digits) + if (!is_empty(digits)) { + args_overall$digits <- digits } - # replacing the function call by variable to NULL to get results overall - x_copy$inputs[["by"]] <- NULL - - # calculating stats overall, and adding header row - tbl_overall <- do.call(class(x)[1], x_copy$inputs) + # create overall table ------------------------------------------------------- + tbl_overall <- do.call(calling_fun, args_overall) # merging overall results - x <- add_overall_merge(x, tbl_overall, last, col_label) + x <- add_overall_merge(x, tbl_overall, last, col_label, calling_fun) x$call_list <- call x } - -add_overall_merge <- function(x, tbl_overall, last, col_label) { - # extracting table body from overall table - overall <- - tbl_overall %>% - pluck("table_body") - +add_overall_merge <- function(x, tbl_overall, last, col_label, calling_fun) { # checking the original tbl_summary and the added overall, # are the same before binding (excluding headers) if (!identical( select(x$table_body, c("row_type", "variable", "label")), - select(overall, c("row_type", "variable", "label")) %>% as_tibble() + select(tbl_overall$table_body, c("row_type", "variable", "label")) |> as_tibble() )) { - paste( - "An error occured in `add_overall()`, and overall statistics cannot be merged.", - "Has the variable label changed since the original call of `tbl_summary()`?" - ) %>% - stringr::str_wrap() %>% - stop(call. = FALSE) + cli::cli_abort( + c( + "An error occured in {.fun add_overall}, and the overall statistic cannot be added.", + "Have variable labels changed since the original call to {.fun {calling_fun}}?" + ), + call = get_cli_abort_call() + ) } - # adding the stat_0 row to to the df_stats tibbles - x$meta_data$df_stats <- - x$meta_data$variable %>% - map( - function(.x) { - bind_rows( - x$meta_data$df_stats[[which(x$meta_data$variable %in% .x)]], - tbl_overall$meta_data$df_stats[[which(tbl_overall$meta_data$variable %in% .x)]] - ) %>% - purrr::imap_dfc( - function(vec, colname) { - attributes(vec) <- - attributes(x$meta_data$df_stats[[which(x$meta_data$variable %in% .x)]][[colname]]) - vec - } - ) - } - ) + # adding the overall cards object to the output + x[["cards"]][["add_overall"]] <- tbl_overall[["cards"]][[1]] # adding overall stat to the table_body data frame - x <- - x %>% - modify_table_body(~ bind_cols(.x, overall %>% select(c("stat_0")))) + x$table_body <- + dplyr::bind_cols( + x$table_body, + tbl_overall$table_body |> dplyr::select("stat_0") + ) - # fill in the Ns in the header table modify_stat_* columns + # add the overall header row to the primary table x$table_styling$header <- - x$table_styling$header %>% + dplyr::bind_rows( + x$table_styling$header, + tbl_overall$table_styling$header |> + dplyr::filter(.data$column %in% "stat_0") + ) + + x$table_styling$header %>% dplyr::rows_update( tbl_overall$table_styling$header %>% dplyr::filter(.data$column %in% "stat_0"), by = "column" ) - x <- .fill_table_header_modify_stats(x) if (last == FALSE) { - x <- x %>% - modify_table_body( - dplyr::relocate, - "stat_0", - .before = "stat_1" - ) + x <- modify_table_body(x, dplyr::relocate, "stat_0", .before = "stat_1") } # updating table_style with footnote and column header @@ -263,26 +180,9 @@ add_overall_merge <- function(x, tbl_overall, last, col_label) { dplyr::filter(.data$column %in% "stat_0") ) - # use defult header for new column - if (is.null(col_label)) { - x$table_styling$header <- - x$table_styling$header %>% - mutate( - label = - ifelse( - .data$column %in% "stat_0", - paste0( - "**", translate_text("Overall"), "**, ", - stringr::str_remove_all(.data$label, pattern = stringr::fixed("**")) - ), - .data$label - ) - ) - } - # use user-specified label - else { - x <- modify_header(x, stat_0 = col_label) - } + # Add + x <- modify_header(x, stat_0 = col_label) + x diff --git a/R/add_p.R b/R/add_p.R index f6286f284..fbe8eb7b9 100644 --- a/R/add_p.R +++ b/R/add_p.R @@ -1,989 +1,477 @@ -#' Adds p-values to gtsummary table +#' Add p-values #' -#' @param x Object created from a gtsummary function -#' @param ... Additional arguments passed to other methods. +#' - [`add_p.tbl_summary()`] +#' +#' @param x (`gtsummary`)\cr +#' Object with class 'gtsummary' +#' @param ... Passed to other methods. #' @keywords internal #' @author Daniel D. Sjoberg -#' @seealso [add_p.tbl_summary], [add_p.tbl_cross], [add_p.tbl_svysummary], [add_p.tbl_survfit], [add_p.tbl_continuous] #' @export +#' +#' @seealso [`add_p.tbl_summary()`] add_p <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") UseMethod("add_p") } -#' Adds p-values to summary tables +#' Add p-values to summary table +#' +#' Adds p-values to tables created by [`tbl_summary()`] by comparing values across groups. +#' +#' @param x (`tbl_summary`)\cr +#' table created with `tbl_summary()` +#' @param test ([`formula-list-selector`][syntax])\cr +#' Specifies the statistical tests to perform for each variable, e.g. +#' `list(all_continuous() ~ "t.test", all_categorical() ~ "fisher.test")`. +#' +#' See below for details on default tests and [?tests][tests] for details on available +#' tests and creating custom tests. +#' @param pvalue_fun (`function`)\cr +#' Function to round and format p-values. Default is `styfn_pvalue()`. +#' The function must have a numeric vector input, and return a string that is +#' the rounded/formatted p-value (e.g. `pvalue_fun = styfn_pvalue(digits = 2)`). +#' @param group ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' Variable name of an ID or grouping variable. The column can be used to +#' calculate p-values with correlated data. +#' Default is `NULL`. See [tests] for methods that utilize the `group` argument. +#' @param include ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' Variables to include in output. Default is `everything()`. +#' @param test.args ([`formula-list-selector`][syntax])\cr +#' Containing additional arguments to pass to tests that accept arguments. +#' For example, add an argument for all t-tests, use +#' `test.args = all_tests("t.test") ~ list(var.equal = TRUE)`. +#' @param adj.vars ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' Variables to include in adjusted calculations (e.g. in ANCOVA models). +#' Default is `NULL`. +#' @param ... Not used +#' +#' @return a gtsummary table of class `"tbl_summary"` +#' @export #' -#' Adds p-values to tables created by `tbl_summary` by comparing values across groups. +#' @section test argument: #' -#' @param x Object with class `tbl_summary` from the [tbl_summary] function -#' @param test List of formulas specifying statistical tests to perform for each -#' variable, -#' e.g. \code{list(all_continuous() ~ "t.test", all_categorical() ~ "fisher.test")}. -#' Common tests include `"t.test"`, `"aov"`, `"wilcox.test"`, `"kruskal.test"`, -#' `"chisq.test"`, `"fisher.test"`, and `"lme4"` (for clustered data). See [tests] -#' for details, more tests, and instruction for implementing a custom test. +#' See the [?tests][tests] help file for details on available tests and creating custom tests. +#' The [?tests][tests] help file also includes psuedo-code for each test to be clear +#' precisely how the calculation is performed. #' -#' Tests default to `"kruskal.test"` for continuous variables (`"wilcox.test"` -#' when "`by`" variable has two levels), `"chisq.test.no.correct"` for -#' categorical variables with all expected cell counts >=5, and `"fisher.test"` -#' for categorical variables with any expected cell count <5. -#' @param group Column name (unquoted or quoted) of an ID or grouping variable. -#' The column can be used to calculate p-values with correlated data. -#' Default is `NULL`. See [tests] for methods that utilize the `group=` argument. -#' @param test.args List of formulas containing additional arguments to pass to -#' tests that accept arguments. For example, add an argument for all t-tests, -#' use `test.args = all_tests("t.test") ~ list(var.equal = TRUE)` -#' @param ... Not used -#' @inheritParams tbl_regression -#' @inheritParams tbl_summary -#' @family tbl_summary tools -#' @seealso See tbl_summary \href{https://www.danieldsjoberg.com/gtsummary/articles/tbl_summary.html}{vignette} for detailed examples -#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary -#' @export -#' @return A `tbl_summary` object -#' @author Daniel D. Sjoberg, Emily C. Zabor -#' @examples -#' \donttest{ +#' The default test used in `add_p()` primarily depends on these factors: +#' - whether the variable is categorical/dichotomous vs continuous +#' - number of levels in the `tbl_summary(by)` variable +#' - whether the `add_p(group)` argument is specified +#' - whether the `add_p(adj.vars)` argument is specified +#' +#' #### Specified neither `add_p(group)` nor `add_p(adj.vars)` +#' +#' - `"wilcox.test"` when `by` variable has two levels and variable is continuous. +#' - `"krustkal.test"` when `by` variable has more than two levels and variable is continuous. +#' - `"chisq.test.no.correct"` for categorical variables with all expected cell counts >=5, +#' and `"fisher.test"` for categorical variables with any expected cell count <5. +#' +#' #### Specified `add_p(group)` and not `add_p(adj.vars)` +#' +#' - `"lme4"` when `by` variable has two levels for all summary types. +#' +#' *There is no default for grouped data when `by` variable has more than two levels.* +#' *Users must create custom tests for this scenario.* +#' +#' #### Specified `add_p(adj.vars)` and not `add_p(group)` +#' +#' - `"ancova"` when variable is continuous and `by` variable has two levels. +#' +#' @examplesIf gtsummary:::is_pkg_installed("cardx", reference_pkg = "gtsummary") && gtsummary:::is_pkg_installed("broom", reference_pkg = "cardx") #' # Example 1 ---------------------------------- #' add_p_ex1 <- -#' trial[c("age", "grade", "trt")] %>% -#' tbl_summary(by = trt) %>% +#' trial |> +#' tbl_summary(by = trt, include = c(age, grade)) |> #' add_p() #' #' # Example 2 ---------------------------------- #' add_p_ex2 <- #' trial %>% -#' select(trt, age, marker) %>% -#' tbl_summary(by = trt, missing = "no") %>% +#' select(trt, age, marker) |> +#' tbl_summary(by = trt, missing = "no") |> #' add_p( #' # perform t-test for all variables #' test = everything() ~ "t.test", #' # assume equal variance in the t-test #' test.args = all_tests("t.test") ~ list(var.equal = TRUE) #' ) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_ex1.png", width = "60")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_ex2.png", width = "60")` -#' }} - -add_p.tbl_summary <- function(x, test = NULL, pvalue_fun = NULL, - group = NULL, include = everything(), - test.args = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) +add_p.tbl_summary <- function(x, + test = NULL, + pvalue_fun = styfn_pvalue(digits = 1), + group = NULL, + include = everything(), + test.args = NULL, + adj.vars = NULL, + ...) { + set_cli_abort_call() + # check/process inputs ------------------------------------------------------- + check_dots_empty() updated_call_list <- c(x$call_list, list(add_p = match.call())) - # setting defaults from gtsummary theme -------------------------------------- - test <- test %||% get_theme_element("add_p.tbl_summary-arg:test") - pvalue_fun <- - pvalue_fun %||% - get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||% - get_theme_element("pkgwide-fn:pvalue_fun") %||% - .get_deprecated_option("gtsummary.pvalue_fun", default = style_pvalue) %>% - gts_mapper("add_p(pvalue_fun=)") - - # converting bare arguments to string ---------------------------------------- - group <- - .select_to_varnames( - select = {{ group }}, - data = x$inputs$data, - var_info = x$table_body, - arg_name = "group", - select_single = TRUE - ) - include <- - .select_to_varnames( - select = {{ include }}, - data = select(x$inputs$data, any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "include" - ) - # checking that input x has a by var - if (is.null(x$df_by)) { - paste( - "Cannot add a p-value when no 'by' variable", - "in original `tbl_summary(by=)` call." - ) %>% - stop(call. = FALSE) - } - if (any(c("add_difference", "add_p") %in% names(x$call_list)) && - "p.value" %in% names(x$table_body)) { - paste( - "`add_p()` cannot be run after `add_difference()` or `add_p()` when a", - "'p.value' column is already present." - ) %>% - stop(call. = FALSE) + if (is_empty(x$inputs$by)) { + "Cannot run {.fun add_p} when {.code tbl_summary(by)} argument not included." |> + cli::cli_abort(call = get_cli_abort_call()) } - # test ----------------------------------------------------------------------- - # parsing into a named list - test <- - .formula_list_to_named_list( - x = test, - data = select(x$inputs$data, any_of(include)), - var_info = x$table_body, - arg_name = "test", - type_check = chuck(type_check, "is_function_or_string", "fn"), - type_check_msg = chuck(type_check, "is_function_or_string", "msg") + cards::process_selectors( + select_prep(x$table_body, x$inputs$data[x$inputs$include]), + include = {{ include }} + ) + cards::process_selectors(x$inputs$data, group = {{ group }}, adj.vars = {{ adj.vars }}) + check_scalar(group, allow_empty = TRUE) + + cards::process_formula_selectors( + select_prep(x$table_body, x$inputs$data[include]), + test = test, + include_env = TRUE + ) + # add the calling env to the test + test <- .add_env_to_list_elements(test, env = caller_env()) + + cards::check_list_elements( + test, + predicate = \(x) is.character(x) || is.function(x), + error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.", + i = "Value must be {.cls character} or {.cls function}." ) + ) - # checking pvalue_fun are functions - if (!is.function(pvalue_fun)) { - stop("Input 'pvalue_fun' must be a function.", call. = FALSE) + # if `pvalue_fun` not modified, check if we need to use a theme p-value + if (missing(pvalue_fun)) { + pvalue_fun <- + get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||% + get_theme_element("pkgwide-fn:pvalue_fun") %||% + pvalue_fun } + pvalue_fun <- as_function(pvalue_fun) - # caller_env for add_p - caller_env <- rlang::caller_env() - - # getting the test name and pvalue - meta_data <- - x$meta_data %>% - select("variable", "summary_type") %>% - filter(.data$variable %in% include) %>% - mutate( - test = - map2( - .data$variable, .data$summary_type, - function(variable, summary_type) { - .assign_test_tbl_summary( - data = x$inputs$data, variable = variable, summary_type = summary_type, - by = x$by, group = group, test = test - ) - } - ), - test_info = - map( - .data$test, - function(test) .get_add_p_test_fun("tbl_summary", test = test, env = caller_env) - ), - test_name = map_chr(.data$test_info, ~ pluck(.x, "test_name")) - ) - # adding test_name to table body so it can be used to select vars by the test - x$table_body <- - x$table_body %>% - select(-any_of(c("test_name", "test_result"))) %>% - left_join(meta_data[c("variable", "test_name")], by = "variable") %>% - select("variable", "test_name", everything()) - - # converting to named list - test.args <- - .formula_list_to_named_list( - x = test.args, - data = select(x$inputs$data, any_of(include)), - var_info = x$table_body, - arg_name = "test.args", - type_check = chuck(type_check, "is_named", "fn"), - type_check_msg = chuck(type_check, "is_named", "msg") + # select test ---------------------------------------------------------------- + test <- + assign_tests( + x = x, + test = test, + group = group, + adj.vars = adj.vars, + include = include, + calling_fun = "add_p" ) - x$meta_data <- - meta_data %>% - mutate( - test_result = pmap( - list(.data$test_info, .data$variable, .data$summary_type), - function(test_info, variable, summary_type) { - .run_add_p_test_fun( - x = test_info, data = .env$x$inputs$data, - by = .env$x$by, variable = variable, - group = group, type = summary_type, - test.args = test.args[[variable]], tbl = x - ) - } - ), - p.value = map_dbl(.data$test_result, ~ pluck(.x, "df_result", "p.value")), - stat_test_lbl = map_chr(.data$test_result, ~ pluck(.x, "df_result", "method")) - ) %>% - select("variable", "test_result", "p.value", "stat_test_lbl") %>% - { - left_join( - x$meta_data %>% select(-any_of(c("test_result", "p.value", "stat_test_lbl"))), - ., + # add all available test meta data to a data frame --------------------------- + df_test_meta_data <- + imap( + test, + ~ dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_) + ) |> + dplyr::bind_rows() + + # add test names to `.$table_body` so it can be used in selectors ------------ + if (!"test_name" %in% names(x$table_body)) { + x$table_body <- + dplyr::left_join( + x$table_body, + df_test_meta_data[c("variable", "test_name")], by = "variable" - ) - } - - x$call_list <- updated_call_list - add_p_merge_p_values(x, meta_data = x$meta_data, pvalue_fun = pvalue_fun) -} - -# function to create text for footnote -footnote_add_p <- function(meta_data) { - if (!"test_result" %in% names(meta_data)) { - return(NA_character_) - } - - footnotes <- - meta_data$test_result %>% - map_chr(~ pluck(., "df_result", "method") %||% NA_character_) %>% - stats::na.omit() %>% - unique() - - if (length(footnotes) > 0) { - language <- get_theme_element("pkgwide-str:language", default = "en") - return(paste(map_chr(footnotes, ~ translate_text(.x, language)), collapse = "; ")) + ) |> + dplyr::relocate("test_name", .after = "variable") } else { - return(NA_character_) + x$table_body <- + dplyr::rows_update( + x$table_body, + df_test_meta_data[c("variable", "test_name")], + by = "variable", + unmatched = "ignore" + ) |> + dplyr::relocate("test_name", .after = "variable") } -} -# function to merge p-values to tbl -add_p_merge_p_values <- function(x, lgl_add_p = TRUE, - meta_data, pvalue_fun, - estimate_fun = NULL, - conf.level = 0.95, - adj.vars = NULL) { - x <- - # merging in p-value to table_body - modify_table_body( - x, - left_join, - meta_data %>% - select("variable", "test_result") %>% - mutate( - df_result = map(.data$test_result, ~ pluck(.x, "df_result")), - row_type = "label" - ) %>% - unnest("df_result") %>% - select( - -any_of("method"), - # removing any columns already present in table_body - -any_of(names(x$table_body) %>% setdiff(c("variable", "row_type"))) - ), - by = c("variable", "row_type") - ) %>% - # adding print instructions for p-value column - modify_table_styling( - columns = any_of("p.value"), - label = paste0("**", translate_text("p-value"), "**"), - hide = FALSE, - fmt_fun = pvalue_fun, - footnote = footnote_add_p(meta_data) - ) - # don't display difference and CI for add_p fns - if (lgl_add_p == FALSE) { - x <- x %>% - # adding print instructions for estimate - modify_table_styling( - columns = any_of("estimate"), - label = ifelse(is.null(adj.vars), - paste0("**", translate_text("Difference"), "**"), - paste0("**", translate_text("Adjusted Difference"), "**") - ), - hide = FALSE, - fmt_fun = switch(is_function(estimate_fun), - estimate_fun - ), - footnote = footnote_add_p(meta_data) - ) - - # add row formatting for difference and CI - if (is.list(estimate_fun)) { - x$table_styling$fmt_fun <- - x$table_styling$fmt_fun %>% - bind_rows( - estimate_fun %>% - tibble::enframe("variable", "fmt_fun") %>% - rowwise() %>% - mutate( - column = - c("estimate", "conf.low", "conf.high") %>% - intersect(names(x$table_body)) %>% - list(), - rows = glue(".data$variable == '{variable}'") %>% rlang::parse_expr() %>% list() - ) %>% - ungroup() %>% - select("column", "rows", "fmt_fun") %>% - unnest(cols = "column") - ) - } - - - # adding formatted CI column - if (all(c("conf.low", "conf.high") %in% names(x$table_body)) && - !"ci" %in% names(x$table_body)) { - ci.sep <- get_theme_element("pkgwide-str:ci.sep", default = ", ") - x <- x %>% - modify_table_body( - ~ .x %>% - mutate( - ci = pmap_chr( - list(variable, conf.low, conf.high), - ~ case_when( - !is.na(..2) | !is.na(..3) ~ - paste(do.call(estimate_fun[[..1]], list(..2)), - do.call(estimate_fun[[..1]], list(..3)), - sep = ci.sep - ) - ) - ) - ) - ) %>% - modify_table_body(dplyr::relocate, "ci", .before = "conf.low") %>% - # adding print instructions for estimates - modify_table_styling( - any_of("ci"), - label = paste0("**", conf.level * 100, "% ", translate_text("CI"), "**"), - hide = FALSE, - footnote = footnote_add_p(meta_data), - footnote_abbrev = translate_text("CI = Confidence Interval") - ) - } - } - - # fill in the Ns in the header table modify_stat_* columns - x <- .fill_table_header_modify_stats(x) - x -} - - -#' Adds p-value to crosstab table -#' -#' Calculate and add a p-value comparing the two variables in the cross table. -#' Missing values are included in p-value calculations. -#' -#' @param x Object with class `tbl_cross` from the [tbl_cross] function -#' @param pvalue_fun Function to round and format p-value. -#' Default is [style_pvalue], except when `source_note = TRUE` when the -#' default is `style_pvalue(x, prepend_p = TRUE)` -#' @param source_note Logical value indicating whether to show p-value -#' in the \{gt\} table source notes rather than a column. -#' @param test A string specifying statistical test to perform. Default is -#' "`chisq.test`" when expected cell counts >=5 and "`fisher.test`" when -#' expected cell counts <5. -#' @param test.args Named list containing additional arguments to pass to -#' the test (if it accepts additional arguments). -#' For example, add an argument for a chi-squared test with -#' `test.args = list(correct = TRUE)` -#' @inheritParams add_p.tbl_summary -#' @family tbl_cross tools -#' @author Karissa Whiting -#' @export -#' @examples -#' \donttest{ -#' # Example 1 ---------------------------------- -#' add_p_cross_ex1 <- -#' trial %>% -#' tbl_cross(row = stage, col = trt) %>% -#' add_p() -#' -#' # Example 2 ---------------------------------- -#' add_p_cross_ex2 <- -#' trial %>% -#' tbl_cross(row = stage, col = trt) %>% -#' add_p(source_note = TRUE) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_cross_ex1.png", width = "50")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_cross_ex2.png", width = "45")` -#' }} -add_p.tbl_cross <- function(x, test = NULL, pvalue_fun = NULL, - source_note = NULL, - test.args = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_p = match.call())) - - # setting defaults ----------------------------------------------------------- - test <- test %||% get_theme_element("add_p.tbl_cross-arg:test") - source_note <- source_note %||% - get_theme_element("add_p.tbl_cross-arg:source_note", default = FALSE) - if (source_note == FALSE) { - pvalue_fun <- - pvalue_fun %||% - get_theme_element("add_p.tbl_cross-arg:pvalue_fun") %||% - get_theme_element("pkgwide-fn:pvalue_fun") %||% - .get_deprecated_option("gtsummary.pvalue_fun", default = style_pvalue) %>% - gts_mapper("add_p(pvalue_fun=)") - } else { - pvalue_fun <- - pvalue_fun %||% - get_theme_element("pkgwide-fn:prependpvalue_fun") %||% - (function(x) style_pvalue(x, prepend_p = TRUE)) %>% - gts_mapper("add_p(pvalue_fun=)") - } - - # adding test name if supplied (NULL otherwise) - input_test <- switch(!is.null(test), - rlang::expr(everything() ~ !!test) + # now process the `test.args` argument --------------------------------------- + cards::process_formula_selectors( + select_prep(x$table_body, x$inputs$data[include]), + test.args = test.args ) - input_test.args <- switch(!is.null(test.args), - rlang::expr(everything() ~ !!test.args) + cards::check_list_elements( + test.args, + predicate = \(x) is.list(x) && is_named(x), + error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.", + i = "Value must be a named list." + ) ) - # running add_p to add the p-value to the output - x_copy <- x - # passing the data frame after missing values have been transformed to factor/observed levels - x$inputs$data <- x$tbl_data + # calculate tests ------------------------------------------------------------ x <- - expr( - add_p.tbl_summary(x, - test = !!input_test, - test.args = !!input_test.args, - pvalue_fun = !!pvalue_fun, - include = -any_of("..total..") - ) - ) %>% - eval() - # replacing the input data set with the original from the `tbl_cross()` call - x$inputs$data <- x_copy$inputs$data - - if (source_note == TRUE) { - test_name <- - x$meta_data$stat_test_lbl %>% - discard(is.na) %>% - translate_text() - # report p-value as a source_note - # hiding p-value from output - x <- - modify_table_styling( - x, - columns = "p.value", - footnote = NA_character_, - hide = TRUE - ) - - x$table_styling$source_note <- - paste(test_name, pvalue_fun(discard(x$meta_data$p.value, is.na)), sep = ", ") - attr(x$table_styling$source_note, "text_interpret") <- "md" - } - - # strip markdown bold around column label ------------------------------------ - x$table_styling$header <- - x$table_styling$header %>% - mutate( - label = - ifelse( - .data$column %in% "p.value", - stringr::str_replace_all(.data$label, - pattern = "\\*\\*(.*?)\\*\\*", - replacement = "\\1" - ), - .data$label - ) + calculate_and_add_test_results( + x = x, include = include, group = group, test.args = test.args, adj.vars = adj.vars, + df_test_meta_data = df_test_meta_data, pvalue_fun = pvalue_fun, calling_fun = "add_p" ) - # return tbl_cross ----------------------------------------------------------- + # update call list x$call_list <- updated_call_list + x } +calculate_and_add_test_results <- function(x, include, group, test.args, adj.vars = NULL, + df_test_meta_data, pvalue_fun = NULL, + estimate_fun = NULL, conf.level = 0.95, + calling_fun) { + # list of ARDs or broom::tidy-like results + lst_results <- + lapply( + include, + \(variable) { + # evaluate the test + lst_captured_results <- + cards::eval_capture_conditions( + do.call( + what = + df_test_meta_data |> + dplyr::filter(.data$variable %in% .env$variable) |> + dplyr::pull("fun_to_run") %>% + getElement(1), + args = list( + data = x$inputs$data, + variable = variable, + by = x$inputs$by, + group = group, + type = x$inputs$type[[variable]], + test.args = test.args[[variable]], + adj.vars = adj.vars, + conf.level = conf.level + ) + ) + ) -#' Adds p-value to survfit table -#' -#' \lifecycle{maturing} -#' Calculate and add a p-value -#' @param x Object of class `"tbl_survfit"` -#' @param test string indicating test to use. Must be one of `"logrank"`, `"tarone"`, `"survdiff"`, -#' `"petopeto_gehanwilcoxon"`, `"coxph_lrt"`, `"coxph_wald"`, `"coxph_score".` -#' See details below -#' @inheritParams add_p.tbl_summary -#' @inheritParams combine_terms -#' @family tbl_survfit tools -#' -#' @section test argument: -#' The most common way to specify `test=` is by using a single string indicating -#' the test name. However, if you need to specify different tests within the same -#' table, the input in flexible using the list notation common throughout the -#' gtsummary package. For example, the following code would call the log-rank test, -#' and a second test of the *G-rho* family. -#' ```r -#' ... %>% -#' add_p(test = list(trt ~ "logrank", grade ~ "survdiff"), -#' test.args = grade ~ list(rho = 0.5)) -#' ``` -#' -#' @export -#' @examplesIf broom.helpers::.assert_package("survival", pkg_search = "gtsummary", boolean = TRUE) -#' \donttest{ -#' library(survival) -#' -#' gts_survfit <- -#' list( -#' survfit(Surv(ttdeath, death) ~ grade, trial), -#' survfit(Surv(ttdeath, death) ~ trt, trial) -#' ) %>% -#' tbl_survfit(times = c(12, 24)) -#' -#' # Example 1 ---------------------------------- -#' add_p_tbl_survfit_ex1 <- -#' gts_survfit %>% -#' add_p() -#' -#' # Example 2 ---------------------------------- -#' # Pass `rho=` argument to `survdiff()` -#' add_p_tbl_survfit_ex2 <- -#' gts_survfit %>% -#' add_p(test = "survdiff", test.args = list(rho = 0.5)) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_tbl_survfit_ex1.png", width = "55")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_tbl_survfit_ex2.png", width = "45")` -#' }} - -add_p.tbl_survfit <- function(x, test = "logrank", test.args = NULL, - pvalue_fun = style_pvalue, - include = everything(), quiet = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_p = match.call())) - # setting defaults ----------------------------------------------------------- - quiet <- quiet %||% get_theme_element("pkgwide-lgl:quiet") %||% FALSE - - # checking inputs ------------------------------------------------------------ - pvalue_fun <- - pvalue_fun %||% - get_theme_element("pkgwide-fn:pvalue_fun") %||% - .get_deprecated_option("gtsummary.pvalue_fun", default = style_pvalue) %>% - gts_mapper("add_p(pvalue_fun=)") - - include <- .select_to_varnames(select = {{ include }}, var_info = x$meta_data) - - # if user passed a string of the test name, convert it to a tidy select list - if (rlang::is_string(test)) { - test <- expr(everything() ~ !!test) %>% eval() - if (!is.null(test.args)) { - test.args <- expr(everything() ~ !!test.args) %>% eval() - } - } - - # converting test and test.args to named list -------------------------------- - test <- - .formula_list_to_named_list( - x = test, - var_info = x$table_body, - arg_name = "test", - type_check = chuck(type_check, "is_function_or_string", "fn"), - type_check_msg = chuck(type_check, "is_function_or_string", "msg") - ) - - # adding pvalue to meta data ------------------------------------------------- - # caller_env for add_p - caller_env <- rlang::caller_env() - - # getting the test name and p-value - meta_data <- - x$meta_data %>% - filter(.data$stratified == TRUE & .data$variable %in% include) %>% - select("variable", "survfit") %>% - mutate( - test = map(.data$variable, ~ test[[.x]] %||% "logrank"), - test_info = map( - .data$test, - function(test) .get_add_p_test_fun("tbl_survfit", test = test, env = caller_env) - ), - test_name = map_chr(.data$test_info, ~ pluck(.x, "test_name")) - ) - # adding test_name to table body so it can be used to select vars by the test - x$table_body <- - left_join(x$table_body, meta_data[c("variable", "test_name")], by = "variable") %>% - select("variable", "test_name", everything()) - - # converting to named list - test.args <- - .formula_list_to_named_list( - x = test.args, - var_info = x$table_body, - arg_name = "test.args", - type_check = chuck(type_check, "is_named", "fn"), - type_check_msg = chuck(type_check, "is_named", "msg") - ) - - # checking the formula and data from survfit object are available - purrr::walk( - x$meta_data$survfit, - function(suvfit) { - # extracting survfit call - survfit_call <- suvfit$call %>% as.list() - # index of formula and data - call_index <- names(survfit_call) %in% c("formula", "data") %>% which() - # converting call into a model.frame call - rlang::call2(rlang::expr(stats::model.frame), !!!survfit_call[call_index]) %>% - safe_survfit_eval() - } - ) + # if there was a warning captured, print it now + if (!is.null(lst_captured_results[["warning"]])) { + cli::cli_inform(c( + "The following warning was returned in {.fun {calling_fun}} for variable {.val {variable}}", + "!" = lst_captured_results[["warning"]] + )) + } - x$meta_data <- - meta_data %>% - mutate( - test_result = pmap( - list(.data$test_info, .data$variable, .data$survfit), - function(test_info, variable, survfit) { - .run_add_p_test_fun( - x = test_info, data = survfit, - variable = variable, - test.args = test.args[[variable]] - ) + # if test evaluated without error, return the result + if (!is.null(lst_captured_results[["result"]])) return(lst_captured_results[["result"]]) # styler: off + # otherwise, construct a {cards}-like object with error + dplyr::tibble( + group1 = x$inputs$by, + variable = variable, + stat_name = switch(calling_fun, + "add_p" = "p.value", + "add_difference" = "estimate" + ), + stat = list(NULL), + warning = lst_captured_results["warning"], + error = lst_captured_results["error"] + ) %>% + structure(., class = c("card", class(.))) + } + ) |> + stats::setNames(include) + + # print any errors or warnings + lst_results |> + map( + \(x) { + if (inherits(x, "card")) { + x |> + dplyr::mutate( + across(c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")), ~ unlist(.) |> as.character()) + ) + } else { + NULL } - ), - p.value = map_dbl(.data$test_result, ~ pluck(.x, "df_result", "p.value")), - stat_test_lbl = map_chr(.data$test_result, ~ pluck(.x, "df_result", "method")) - ) %>% - select("variable", "test_result", "p.value", "stat_test_lbl") %>% + } + ) |> + dplyr::bind_rows() %>% { - left_join(x$meta_data, ., by = "variable") + switch(!is_empty(.), + dplyr::filter(., .data$stat_name %in% c( + "estimate", "std.error", "parameter", "statistic", + "conf.low", "conf.high", "p.value" + )) |> + cards::print_ard_conditions() + ) } - x$call_list <- updated_call_list - add_p_merge_p_values(x, meta_data = x$meta_data, pvalue_fun = pvalue_fun) -} - - -#' Adds p-values to svysummary tables -#' -#' Adds p-values to tables created by `tbl_svysummary` by comparing values across groups. -#' -#' @param x Object with class `tbl_svysummary` from the [tbl_svysummary] function -#' @param test List of formulas specifying statistical tests to perform, -#' e.g. \code{list(all_continuous() ~ "svy.t.test", all_categorical() ~ "svy.wald.test")}. -#' Options include -#' * `"svy.t.test"` for a t-test adapted to complex survey samples (cf. `survey::svyttest`), -#' * `"svy.wilcox.test"` for a Wilcoxon rank-sum test for complex survey samples (cf. `survey::svyranktest`), -#' * `"svy.kruskal.test"` for a Kruskal-Wallis rank-sum test for complex survey samples (cf. `survey::svyranktest`), -#' * `"svy.vanderwaerden.test"` for a van der Waerden's normal-scores test for complex survey samples (cf. `survey::svyranktest`), -#' * `"svy.median.test"` for a Mood's test for the median for complex survey samples (cf. `survey::svyranktest`), -#' * `"svy.chisq.test"` for a Chi-squared test with Rao & Scott's second-order correction (cf. `survey::svychisq`), -#' * `"svy.adj.chisq.test"` for a Chi-squared test adjusted by a design effect estimate (cf. `survey::svychisq`), -#' * `"svy.wald.test"` for a Wald test of independence for complex survey samples (cf. `survey::svychisq`), -#' * `"svy.adj.wald.test"` for an adjusted Wald test of independence for complex survey samples (cf. `survey::svychisq`), -#' * `"svy.lincom.test"` for a test of independence using the exact asymptotic distribution for complex survey samples (cf. `survey::svychisq`), -#' * `"svy.saddlepoint.test"` for a test of independence using a saddlepoint approximation for complex survey samples (cf. `survey::svychisq`), -#' -#' Tests default to `"svy.wilcox.test"` for continuous variables and `"svy.chisq.test"` -#' for categorical variables. -#' @param ... Not used -#' @inheritParams add_p.tbl_summary -#' @inheritParams tbl_regression -#' @inheritParams tbl_svysummary -#' @family tbl_svysummary tools -#' @export -#' @return A `tbl_svysummary` object -#' @author Joseph Larmarange -#' @examplesIf broom.helpers::.assert_package("survey", pkg_search = "gtsummary", boolean = TRUE) -#' \donttest{ -#' # Example 1 ---------------------------------- -#' # A simple weighted dataset -#' add_p_svysummary_ex1 <- -#' survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) %>% -#' tbl_svysummary(by = Survived, include = c(Sex, Age)) %>% -#' add_p() -#' -#' # A dataset with a complex design -#' data(api, package = "survey") -#' d_clust <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) -#' -#' # Example 2 ---------------------------------- -#' add_p_svysummary_ex2 <- -#' tbl_svysummary(d_clust, by = both, include = c(api00, api99, both)) %>% -#' add_p() -#' -#' # Example 3 ---------------------------------- -#' # change tests to svy t-test and Wald test -#' add_p_svysummary_ex3 <- -#' tbl_svysummary(d_clust, by = both, include = c(cname, api00, api99, both)) %>% -#' add_p( -#' test = list( -#' all_continuous() ~ "svy.t.test", -#' all_categorical() ~ "svy.wald.test" -#' ) -#' ) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_svysummary_ex1.png", width = "45")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_svysummary_ex2.png", width = "65")` -#' }} -#' -#' \if{html}{Example 3} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_p_svysummary_ex3.png", width = "60")` -#' }} - -add_p.tbl_svysummary <- function(x, test = NULL, pvalue_fun = NULL, - include = everything(), test.args = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_p = match.call())) - - # checking for survey package ------------------------------------------------ - assert_package("survey", "add_p.tbl_svysummary()") - - # setting defaults from gtsummary theme -------------------------------------- - test <- test %||% - get_theme_element("add_p.tbl_svysummary-arg:test") %||% - get_theme_element("add_p.tbl_summary-arg:test") - pvalue_fun <- - pvalue_fun %||% - get_theme_element("add_p.tbl_svysummary-arg:pvalue_fun") %||% - get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||% - get_theme_element("pkgwide-fn:pvalue_fun") %||% - .get_deprecated_option("gtsummary.pvalue_fun", default = style_pvalue) %>% - gts_mapper("add_p(pvalue_fun=)") - - - # converting bare arguments to string ---------------------------------------- - include <- - .select_to_varnames( - select = {{ include }}, - data = select(x$inputs$data$variables, any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "include" - ) - - # checking that input x has a by var - if (is.null(x$df_by)) { - stop(paste0( - "Cannot add comparison when no 'by' variable ", - "in original `tbl_svysummary()` call" - ), call. = FALSE) - } - - # test ----------------------------------------------------------------------- - # parsing into a named list - test <- - .formula_list_to_named_list( - x = test, - data = select(x$inputs$data$variables, any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "test", - type_check = chuck(type_check, "is_function_or_string", "fn"), - type_check_msg = chuck(type_check, "is_function_or_string", "msg") - ) - - # checking pvalue_fun are functions - if (!is.function(pvalue_fun)) { - stop("Input 'pvalue_fun' must be a function.", call. = FALSE) - } - # caller_env for add_p - caller_env <- rlang::caller_env() + # combine results into a single data frame + df_results <- + lst_results |> + imap( + function(x, variable) { + # if results are an ARD, reshape into broom::tidy-like format + if (inherits(x, "card")) { + res <- + dplyr::filter(x, .data$stat_name %in% c( + "estimate", "std.error", "parameter", "statistic", + "conf.low", "conf.high", "p.value" + )) |> + cards::replace_null_statistic() |> + tidyr::pivot_wider( + id_cols = "variable", + names_from = "stat_name", + values_from = "stat" + ) |> + dplyr::mutate(across(-"variable", unlist)) + } else { + if (!is.data.frame(x)) { + cli::cli_abort( + c("Expecting the test result object for variable {.val {variable}} to be a {.cls {c('data.frame', 'tibble')}}.", + i = "Review {.help gtsummary::tests} for details on constructing a custom function." + ), + call = get_cli_abort_call() + ) + } - # getting the test name and pvalue - meta_data <- - x$meta_data %>% - select("variable", "summary_type") %>% - filter(.data$variable %in% include) %>% - mutate( - test = map2( - .data$variable, .data$summary_type, - function(variable, summary_type) { - .assign_test_tbl_svysummary( - data = x$inputs$data, variable = variable, summary_type = summary_type, - by = x$by, test = test - ) + res <- + dplyr::select(x, any_of(c( + "estimate", "std.error", "parameter", "statistic", + "conf.low", "conf.high", "p.value" + ))) |> + dplyr::mutate(variable = .env$variable, .before = 1L) + # check the result structure + if (identical(names(res), "variable") || nrow(res) != 1L) { + cli::cli_abort( + c("The test result object for variable {.val {variable}} is not the expected structure.", + i = "Review {.help gtsummary::tests} for details on constructing a custom function." + ), + call = get_cli_abort_call() + ) + } } - ), - test_info = map( - .data$test, - function(test) .get_add_p_test_fun("tbl_svysummary", test = test, env = caller_env) - ), - test_name = map_chr(.data$test_info, ~ pluck(.x, "test_name")) - ) - # adding test_name to table body so it can be used to select vars by the test - x$table_body <- - left_join(x$table_body, meta_data[c("variable", "test_name")], by = "variable") %>% - select("variable", "test_name", everything()) - - # converting to named list - test.args <- - .formula_list_to_named_list( - x = test.args, - data = select(x$inputs$data, any_of(include)), - var_info = x$table_body, - arg_name = "test.args", - type_check = chuck(type_check, "is_named", "fn"), - type_check_msg = chuck(type_check, "is_named", "msg") + res + } + ) |> + dplyr::bind_rows() |> + dplyr::select( + any_of(c( + "variable", "estimate", "std.error", "parameter", "statistic", + "conf.low", "conf.high", "p.value" + )) ) - x$meta_data <- - meta_data %>% - mutate( - test_result = pmap( - list(.data$test_info, .data$variable, .data$summary_type), - function(test_info, variable, summary_type) { - .run_add_p_test_fun( - x = test_info, data = .env$x$inputs$data, - by = .env$x$inputs$by, variable = variable, - type = summary_type, - test.args = test.args[[variable]] - ) - } + # remove new columns that already exist in gtsummary table + new_columns <- names(df_results) |> setdiff(names(x$table_body)) + if (is_empty(new_columns)) { + cli::cli_abort( + c("Columns {.val {names(df_results) |> setdiff('variable')}} are already present in table (although, some may be hidden), and no new columns were added.", + i = "Use {.code tbl |> modify_table_body(\\(x) dplyr::select(x, -p.value))} to remove columns and they will be replaced by the new columns from the current call." ), - p.value = map_dbl(.data$test_result, ~ pluck(.x, "df_result", "p.value")), - stat_test_lbl = map_chr(.data$test_result, ~ pluck(.x, "df_result", "method")) - ) %>% - select("variable", "test_result", "p.value", "stat_test_lbl") %>% - { - left_join(x$meta_data, ., by = "variable") - } - - x$call_list <- updated_call_list - add_p_merge_p_values(x, meta_data = x$meta_data, pvalue_fun = pvalue_fun) -} - -#' P-values for `tbl_continuous` -#' -#' @inheritParams add_p.tbl_summary -#' @param test List of formulas specifying statistical tests to perform for each -#' variable. -#' Default is two-way ANOVA when `by=` is not `NULL`, and has the same defaults -#' as `add_p.tbl_continuous()` when `by = NULL`. -#' See [tests] for details, more tests, and instruction for implementing a custom test. -#' @export -#' @family tbl_continuous tools -#' @examples -#' add_p_continuous_ex1 <- -#' tbl_continuous( -#' data = trial, -#' variable = age, -#' by = trt, -#' include = grade -#' ) %>% -#' add_p() -add_p.tbl_continuous <- function(x, test = NULL, pvalue_fun = NULL, - include = everything(), test.args = NULL, - group = NULL, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - updated_call_list <- c(x$call_list, list(add_p = match.call())) - # setting defaults from gtsummary theme -------------------------------------- - pvalue_fun <- - pvalue_fun %||% - get_theme_element("pkgwide-fn:pvalue_fun", default = style_pvalue) %>% - gts_mapper("add_p(pvalue_fun=)") - - # converting bare arguments to string ---------------------------------------- - include <- - .select_to_varnames( - select = {{ include }}, - data = select(x$inputs$data, any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "include" + call = get_cli_abort_call() ) + } - group <- - .select_to_varnames( - select = {{ group }}, - data = x$inputs$data, - arg_name = "group" + # create default footnote text + footnote <- map( + lst_results, + function(x) { + if (inherits(x, "card")) { + ft <- x |> + dplyr::filter(.data$stat_name %in% "method") |> + dplyr::pull("stat") |> + unlist() + } else { + ft <- x[["method"]] + } + ft + } + ) |> + unlist() |> + unique() |> + paste(collapse = "; ") + if (footnote == "" || is_empty(footnote)) footnote <- NULL # styler: off + + # add results to `.$table_body` ---------------------------------------------- + x <- x |> + modify_table_body( + ~ dplyr::left_join( + .x, + df_results[c("variable", new_columns)] |> dplyr::mutate(row_type = "label"), + by = c("variable", "row_type") + ) ) - test <- - .formula_list_to_named_list( - x = test, - data = select(x$inputs$data, any_of(x$meta_data$variable)), - var_info = x$table_body, - arg_name = "test", - type_check = chuck(type_check, "is_function_or_string", "fn"), - type_check_msg = chuck(type_check, "is_function_or_string", "msg") + x <- + modify_table_styling( + x, + columns = any_of(intersect("p.value", new_columns)), + label = "**p-value**", + hide = FALSE, + fmt_fun = pvalue_fun %||% styfn_pvalue(), + footnote = footnote + ) |> + modify_table_styling( + columns = + intersect("estimate", new_columns), + hide = calling_fun %in% "add_p", + label = ifelse(is_empty(adj.vars), "**Difference**", "**Adjusted Difference**"), + footnote = footnote + ) |> + modify_table_styling( + columns = + intersect("std.error", new_columns), + hide = TRUE, + label = "**Standard Error**", + footnote = footnote + ) |> + modify_table_styling( + columns = + intersect("parameter", new_columns), + hide = TRUE, + label = "**Parameter**", + fmt_fun = styfn_sigfig(), + footnote = footnote + ) |> + modify_table_styling( + columns = + intersect("statistic", new_columns), + hide = TRUE, + label = "**Statistic**", + fmt_fun = styfn_sigfig(), + footnote = footnote + ) |> + modify_table_styling( + columns = + intersect("conf.low", new_columns), + hide = calling_fun %in% "add_p", + label = glue("**{conf.level * 100}% CI**"), + footnote = footnote, + footnote_abbrev = "CI = Confidence Interval" ) - # checking pvalue_fun are functions - if (!is.function(pvalue_fun)) { - stop("Input 'pvalue_fun' must be a function.", call. = FALSE) + if (calling_fun %in% "add_difference" && all(c("conf.low", "conf.high") %in% new_columns)) { + x <- + modify_column_merge( + x, + pattern = "{conf.low}, {conf.high}", + rows = !is.na(.data$conf.low) + ) } - # caller_env for add_p - caller_env <- rlang::caller_env() - - # getting the test name and pvalue - meta_data <- - x$meta_data %>% - select("variable", "summary_type") %>% - filter(.data$variable %in% .env$include) %>% - mutate( - test = - map( - .data$variable, - function(variable) { - .assign_test_tbl_continuous( - data = x$inputs$data, continuous_variable = x$inputs$variable, - variable = variable, - by = x$inputs$by, group = group, test = test - ) - } - ), - test_info = - map( - .data$test, - function(test) .get_add_p_test_fun("tbl_continuous", test = test, env = caller_env) - ), - test_name = map_chr(.data$test_info, ~ pluck(.x, "test_name")) - ) - - # adding test_name to table body so it can be used to select vars by the test - x$table_body <- - x$table_body %>% - select(-any_of(c("test_name", "test_result"))) %>% - left_join(meta_data[c("variable", "test_name")], by = "variable") %>% - select("variable", "test_name", everything()) + # add the specified formatting functions + for (i in seq_along(estimate_fun)) { + x <- + rlang::inject( + modify_table_styling( + x, + columns = any_of(c("estimate", "conf.low", "conf.high")), + rows = .data$variable %in% !!names(estimate_fun[i]), + fmt_fun = !!(estimate_fun[[i]] %||% styfn_sigfig()) + ) + ) + } - # converting to named list - test.args <- - .formula_list_to_named_list( - x = test.args, - data = select(x$inputs$data, any_of(include)), - var_info = x$table_body, - arg_name = "test.args", - type_check = chuck(type_check, "is_named", "fn"), - type_check_msg = chuck(type_check, "is_named", "msg") - ) + # extending modify_stat_N to new columns + x$table_styling$header <- x$table_styling$header |> + tidyr::fill("modify_stat_N", .direction = "downup") - x$meta_data <- - meta_data %>% - mutate( - test_result = pmap( - list(.data$test_info, .data$variable, .data$summary_type), - function(test_info, variable, summary_type) { - .run_add_p_test_fun( - x = test_info, data = .env$x$inputs$data, - by = .env$x$inputs$by, variable = variable, - group = group, type = summary_type, - test.args = test.args[[variable]], tbl = x, - continuous_variable = x$inputs$variable - ) - } - ), - p.value = map_dbl(.data$test_result, ~ pluck(.x, "df_result", "p.value")), - stat_test_lbl = map_chr(.data$test_result, ~ pluck(.x, "df_result", "method")) - ) %>% - select("variable", "test_result", "p.value", "stat_test_lbl") %>% - { - left_join( - x$meta_data %>% select(-any_of(c("test_result", "p.value", "stat_test_lbl"))), - ., - by = "variable" - ) - } + # add raw results to `.$card` + x$cards[[calling_fun]] <- lst_results - x$call_list <- updated_call_list - add_p_merge_p_values(x, meta_data = x$meta_data, pvalue_fun = pvalue_fun) + x } diff --git a/R/add_q.R b/R/add_q.R index 4648634cd..255dc7540 100644 --- a/R/add_q.R +++ b/R/add_q.R @@ -1,67 +1,60 @@ -#' Add a column of q-values to account for -#' multiple comparisons +#' Add multiple comparison adjustment #' -#' Adjustments to p-values are performed with [stats::p.adjust]. +#' Adjustments to p-values are performed with [`stats::p.adjust()`]. #' -#' @param x a `gtsummary` object -#' @param method String indicating method to be used for p-value -#' adjustment. Methods from -#' [stats::p.adjust] are accepted. Default is `method = "fdr"`. -#' @inheritParams tbl_regression -#' @inheritParams add_global_p -#' @author Esther Drill, Daniel D. Sjoberg -#' @family tbl_summary tools -#' @family tbl_svysummary tools -#' @family tbl_regression tools -#' @family tbl_uvregression tools +#' @param x (`gtsummary`)\cr +#' a `gtsummary` object with a column named `"p.value"` +#' @param method (`string`)\cr +#' String indicating method to be used for p-value adjustment. Methods from +#' [`stats::p.adjust()`] are accepted. Default is `method='fdr'`. +#' Must be one of `r shQuote(stats::p.adjust.methods, "sh")` +#' @param pvalue_fun (`function`)\cr +#' Function to round and format q-values. Default is the function specified +#' to round the existing `'p.value'` column. +#' @param quiet DEPRECATED +#' +#' @author Daniel D. Sjoberg, Esther Drill #' @export -#' @examplesIf broom.helpers::.assert_package("car", pkg_search = "gtsummary", boolean = TRUE) -#' \donttest{ +#' @examplesIf gtsummary:::is_pkg_installed("cardx", reference_pkg = "gtsummary") && gtsummary:::is_pkg_installed("broom", reference_pkg = "cardx") #' # Example 1 ---------------------------------- #' add_q_ex1 <- -#' trial[c("trt", "age", "grade", "response")] %>% -#' tbl_summary(by = trt) %>% -#' add_p() %>% -#' add_q() -#' -#' # Example 2 ---------------------------------- -#' add_q_ex2 <- -#' trial[c("trt", "age", "grade", "response")] %>% -#' tbl_uvregression( -#' y = response, -#' method = glm, -#' method.args = list(family = binomial), -#' exponentiate = TRUE -#' ) %>% -#' add_global_p() %>% +#' trial |> +#' tbl_summary(by = trt, include = c(trt, age, grade, response)) |> +#' add_p() |> #' add_q() -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_q_ex1.png", width = "65")` -#' }} #' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_q_ex2.png", width = "60")` -#' }} - +#' # TODO: Re-add this example after `add_global_p()` migrated +#' # # Example 2 ---------------------------------- +#' # add_q_ex2 <- +#' # trial[c("trt", "age", "grade", "response")] |> +#' # tbl_uvregression( +#' # y = response, +#' # method = glm, +#' # method.args = list(family = binomial), +#' # exponentiate = TRUE +#' # ) |> +#' # add_global_p() |> +#' # add_q() add_q <- function(x, method = "fdr", pvalue_fun = NULL, quiet = NULL) { + set_cli_abort_call() updated_call_list <- c(x$call_list, list(add_q = match.call())) - # setting defaults ----------------------------------------------------------- - quiet <- quiet %||% get_theme_element("pkgwide-lgl:quiet") %||% FALSE - # checking inputs ------------------------------------------------------------ - # checking class of x - .assert_class(x, "gtsummary") + # deprecation ---------------------------------------------------------------- + if (!is_empty(quiet)) { + lifecycle::deprecate_warn( + when = "2.0.0", + what = "gtsummary::add_q(quiet)" + ) + } + # check inputs --------------------------------------------------------------- + check_class(x, "gtsummary") + method <- arg_match(method, values = stats::p.adjust.methods, multiple = TRUE) # checking input table has a p.value column if (!"p.value" %in% names(x$table_body)) { - stop("There is no p-value column. `x$table_body` must have a column called 'p.value'", - call. = FALSE + cli::cli_abort( + "There is no p-value column. `x$table_body` must have a column called {.val p.value}.", + call = get_cli_abort_call() ) } @@ -72,43 +65,27 @@ add_q <- function(x, method = "fdr", pvalue_fun = NULL, quiet = NULL) { get_theme_element("add_q-arg:pvalue_fun") %||% get_theme_element("pkgwide-fn:pvalue_fun") %||% # default from p-value formatting function - (filter(x$table_styling$fmt_fun, .data$column == "p.value") %>% pull("fmt_fun") %>% pluck(1)) %>% - gts_mapper("add_q(pvalue_fun=)") + (dplyr::filter(x$table_styling$fmt_fun, .data$column == "p.value") |> dplyr::pull("fmt_fun") |> rev() |> getElement(1)) |> + as_function(arg = "pvalue_fun") - # checking pvalue_fun are functions - if (!is.function(pvalue_fun)) { - stop("Input 'pvalue_fun' must be a function.") - } - - # perform multiple comparisons ----------------------------------------------- - expr_p.adjust <- - rlang::expr(stats::p.adjust(x$table_body$p.value, method = !!method)) %>% - deparse() - if (quiet == FALSE) { - rlang::inform(glue("add_q: Adjusting p-values with\n`{expr_p.adjust}`")) - } - - x$table_body$q.value <- x$table_body$p.value %>% stats::p.adjust(method = method) + # calculate the adjusted p-value --------------------------------------------- + # TODO: add error handling here + q.value <- stats::p.adjust(x$table_body$p.value, method = method) - # update table_styling ------------------------------------------------------- - # footnote text - footnote_text <- - add_q_method_lookup[add_q_method_lookup$method == method, ]$method_label %>% - translate_text() + # update gtsummary table ----------------------------------------------------- x <- + modify_table_body(x, ~ dplyr::mutate(.x, q.value = q.value)) |> modify_table_styling( - x, columns = "q.value", - footnote = footnote_text, + label = paste0("**", translate_text("q-value"), "**"), + hide = FALSE, + footnote = .add_q_method_label(method), fmt_fun = pvalue_fun ) - # adding column header - x <- modify_header(x, q.value = paste0("**", translate_text("q-value"), "**")) - # return final object -------------------------------------------------------- # fill in the Ns in the header table modify_stat_* columns - x <- .fill_table_header_modify_stats(x) + x$table_styling$header <- tidyr::fill(x$table_styling$header, "modify_stat_N", .direction = "updown") # adding call x$call_list <- updated_call_list @@ -117,22 +94,18 @@ add_q <- function(x, method = "fdr", pvalue_fun = NULL, quiet = NULL) { # match method input to display name -add_q_method_lookup <- - tibble::tibble( - method = stats::p.adjust.methods - ) %>% - left_join( - tibble::tribble( - ~method, ~method_label, - "holm", "Holm correction for multiple testing", - "hochberg", "Hochberg correction for multiple testing", - "hommel", "Hommel correction for multiple testing", - "bonferroni", "Bonferroni correction for multiple testing", - "BH", "Benjamini & Hochberg correction for multiple testing", - "BY", "Benjamini & Yekutieli correction for multiple testing", - "fdr", "False discovery rate correction for multiple testing", - "none", "No correction for multiple testing" - ), - by = "method" - ) %>% - mutate(method_label = coalesce(method_label, method)) +.add_q_method_label <- function(method) { + lst_method_labels <- + list( + "holm" = "Holm correction for multiple testing", + "hochberg" = "Hochberg correction for multiple testing", + "hommel" = "Hommel correction for multiple testing", + "bonferroni" = "Bonferroni correction for multiple testing", + "BH" = "Benjamini & Hochberg correction for multiple testing", + "BY" = "Benjamini & Yekutieli correction for multiple testing", + "fdr" = "False discovery rate correction for multiple testing", + "none" = "No correction for multiple testing" + ) + + lst_method_labels[[method]] +} diff --git a/R/add_stat.R b/R/add_stat.R index b29a4ec0f..5cb9db137 100644 --- a/R/add_stat.R +++ b/R/add_stat.R @@ -1,39 +1,42 @@ -#' Add a custom statistic column +#' Add a custom statistic #' -#' \lifecycle{maturing} #' The function allows a user to add a new column (or columns) of statistics to an #' existing `tbl_summary`, `tbl_svysummary`, or `tbl_continuous` object. #' -#' @param x `tbl_summary`, `tbl_svysummary`, or `tbl_continuous` object -#' @param fns list of formulas indicating the functions that create the statistic. -#' See details below. -#' @param location list of formulas indicating the location the new statistics -#' are placed. The RHS of the formula must be one of `c("label", "level", "missing")`. -#' When `"label"`, a single statistic -#' is placed on the variable label row. When `"level"` the statistics are placed -#' on the variable level rows. The length of the vector of statistics returned from the -#' `fns` function must match the dimension of levels. Default is to place the -#' new statistics on the label row. -#' @param ... DEPRECATED +#' @param x (`tbl_summary`/`tbl_svysummary`/`tbl_continuous`)\cr +#' A gtsummary table of class `'tbl_summary'`, `'tbl_svysummary'`, or `'tbl_continuous'`. +#' @param fns ([`formula-list-selector`][syntax])\cr +#' Indicates the functions that create the statistic. See details below. +#' @param location ([`formula-list-selector`][syntax])\cr +#' Indicates the location the new statistics are placed. +#' The values must be one of `c("label", "level", "missing")`. +#' When `"label"`, a single statistic +#' is placed on the variable label row. When `"level"` the statistics are placed +#' on the variable level rows. The length of the vector of statistics returned from the +#' `fns` function must match the dimension of levels. Default is to place the +#' new statistics on the label row. #' #' @section Details: #' #' The returns from custom functions passed in `fns=` are required to follow a #' specified format. Each of these function will execute on a single variable. +#' #' 1. Each function must return a tibble or a vector. If a vector is returned, -#' it will be converted to a tibble with one column and number of rows equal -#' to the length of the vector. -#' 1. When `location = "label"`, the returned statistic from the custom function -#' must be a tibble with one row. When `location = "level"` the tibble must have -#' the same number of rows as there are levels in the variable (excluding the -#' row for unknown values). +#' it will be converted to a tibble with one column and number of rows equal +#' to the length of the vector. +#' +#' 1. When `location='label'`, the returned statistic from the custom function +#' must be a tibble with one row. When `location='level'` the tibble must have +#' the same number of rows as there are levels in the variable (excluding the +#' row for unknown values). +#' #' 1. Each function may take the following arguments: `foo(data, variable, by, tbl, ...)` #' - `data=` is the input data frame passed to `tbl_summary()` #' - `variable=` is a string indicating the variable to perform the calculation on. This is the variable in the label column of the table. #' - `by=` is a string indicating the by variable from `tbl_summary=`, if present #' - `tbl=` the original `tbl_summary()`/`tbl_svysummary()` object is also available to utilize #' -#' The user-defined does not need to utilize each of these inputs. It's +#' The user-defined function does not need to utilize each of these inputs. It's #' encouraged the user-defined function accept `...` as each of the arguments #' *will* be passed to the function, even if not all inputs are utilized by #' the user's function, e.g. `foo(data, variable, by, ...)` @@ -46,50 +49,44 @@ #' p-value formatting will be applied, and you may take advantage of subsequent #' p-value formatting functions, such as `bold_p()` or `add_q()`. #' -#' To access the continuous variable in a `tbl_continuous()` table, use -#' `tbl$inputs$variable`. -#' #' @export -#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary +#' @return A 'gtsummary' of the same class as the input +#' #' @examples -#' \donttest{ -#' library(dplyr, warn.conflicts = FALSE) -#' library(stringr) #' # Example 1 ---------------------------------- #' # fn returns t-test pvalue #' my_ttest <- function(data, variable, by, ...) { #' t.test(data[[variable]] ~ as.factor(data[[by]]))$p.value #' } #' -#' add_stat_ex1 <- -#' trial %>% -#' select(trt, age, marker) %>% -#' tbl_summary(by = trt, missing = "no") %>% -#' add_stat(fns = everything() ~ my_ttest) %>% -#' modify_header( -#' list( -#' add_stat_1 ~ "**p-value**", -#' all_stat_cols() ~ "**{level}**" -#' ) -#' ) +#' trial |> +#' tbl_summary( +#' by = trt, +#' include = c(trt, age, marker), +#' missing = "no" +#' ) |> +#' add_stat(fns = everything() ~ my_ttest) |> +#' modify_header(add_stat_1 = "**p-value**", all_stat_cols() ~ "**{level}**") #' #' # Example 2 ---------------------------------- #' # fn returns t-test test statistic and pvalue #' my_ttest2 <- function(data, variable, by, ...) { -#' t.test(data[[variable]] ~ as.factor(data[[by]])) %>% +#' t.test(data[[variable]] ~ as.factor(data[[by]])) |> #' broom::tidy() %>% -#' mutate( -#' stat = str_glue("t={style_sigfig(statistic)}, {style_pvalue(p.value, prepend_p = TRUE)}") +#' dplyr::mutate( +#' stat = glue::glue("t={style_sigfig(statistic)}, {style_pvalue(p.value, prepend_p = TRUE)}") #' ) %>% -#' pull(stat) +#' dplyr::pull(stat) #' } #' -#' add_stat_ex2 <- -#' trial %>% -#' select(trt, age, marker) %>% -#' tbl_summary(by = trt, missing = "no") %>% -#' add_stat(fns = everything() ~ my_ttest2) %>% -#' modify_header(add_stat_1 ~ "**Treatment Comparison**") +#' trial |> +#' tbl_summary( +#' by = trt, +#' include = c(trt, age, marker), +#' missing = "no" +#' ) |> +#' add_stat(fns = everything() ~ my_ttest2) |> +#' modify_header(add_stat_1 = "**Treatment Comparison**") #' #' # Example 3 ---------------------------------- #' # return test statistic and p-value is separate columns @@ -99,170 +96,107 @@ #' select(statistic, p.value) #' } #' -#' add_stat_ex3 <- -#' trial %>% -#' select(trt, age, marker) %>% -#' tbl_summary(by = trt, missing = "no") %>% -#' add_stat(fns = everything() ~ my_ttest3) %>% -#' modify_header( -#' list( -#' statistic ~ "**t-statistic**", -#' p.value ~ "**p-value**" -#' ) -#' ) %>% -#' modify_fmt_fun( -#' list( -#' statistic ~ style_sigfig, -#' p.value ~ style_pvalue -#' ) -#' ) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_stat_ex1.png", width = "60")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_stat_ex2.png", width = "60")` -#' }} -#' -#' \if{html}{Example 3} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_stat_ex3.png", width = "60")` -#' }} - -add_stat <- function(x, fns, location = NULL, ...) { +#' trial |> +#' tbl_summary( +#' by = trt, +#' include = c(trt, age, marker), +#' missing = "no" +#' ) |> +#' add_stat(fns = everything() ~ my_ttest3) |> +#' modify_header(statistic = "**t-statistic**", p.value = "**p-value**") |> +#' modify_fmt_fun(statistic = styfn_sigfig(), p.value = styfn_pvalue(digits = 2)) +add_stat <- function(x, fns, location = everything() ~ "label") { + set_cli_abort_call() updated_call_list <- c(x$call_list, list(add_stat = match.call())) - # checking inputs ------------------------------------------------------------ - .assert_class(x, c("tbl_summary", "tbl_svysummary", "tbl_continuous")) - # deprecated arguments ------------------------------------------------------- - dots <- rlang::dots_list(...) - dep_args <- - list( - fmt_fun = list("gtsummary::add_stat(fmt_fun=)", "modify_fmt_fun()"), - header = list("gtsummary::add_stat(header=)", "modify_header()"), - footnote = list("gtsummary::add_stat(footnote=)", "modify_footnote()"), - new_col_name = list("gtsummary::add_stat(new_col_name=)", NULL) - ) - purrr::iwalk( - dep_args, - function(.x, .y) { - if (!is.null(dots[[.y]])) { - lifecycle::deprecate_stop(when = "1.4.0", what = .x[[1]], with = .x[[2]]) - } - } - ) + # checking inputs ------------------------------------------------------------ + check_not_missing(x) + check_not_missing(fns) + check_class(x, c("tbl_summary", "tbl_svysummary", "tbl_continuous")) # convert to named lists ----------------------------------------------------- - if (rlang::is_string(location)) { - lifecycle::deprecate_stop( - "1.4.0", - "gtsummary::add_stat(location = 'must be a formula list, e.g. `everything() ~ \"label\"`,')" - ) - location <- inject(everything() ~ !!location) - } - location <- - .formula_list_to_named_list( - x = location, - data = switch(class(x)[1], - "tbl_summary" = select(x$inputs$data, any_of(x$meta_data$variable)), - "tbl_svysummary" = select(x$inputs$data$variables, any_of(x$meta_data$variable)) - ), - var_info = x$table_body, - arg_name = "location", - type_check = chuck(type_check, "is_string", "fn"), - type_check_msg = chuck(type_check, "is_string", "msg") - ) - imap( - location, - ~ switch(!is_string(.x) || !.x %in% c("label", "level", "missing"), - abort("RHS of `location=` formulas must be one of 'label', 'level', or 'missing'") - ) + cards::process_formula_selectors( + select_prep(x$table_body), + location = location, + fns = fns + ) + cards::fill_formula_selectors( + select_prep(x$table_body), + location = eval(formals(gtsummary::add_stat)[["location"]]) + ) + cards::check_list_elements( + x = location, + predicate = \(x) is_string(x) && x %in% c("label", "level", "missing"), + error_msg = "The element values for the {.arg location} argument + must be one of {.val {c('label', 'level', 'missing')}}." + ) + cards::check_list_elements( + x = fns, + predicate = \(x) is.function(x), + error_msg = "The element values for the {.arg fns} argument must be a {.cls function}." ) - - fns <- - .formula_list_to_named_list( - x = fns, - data = switch(class(x)[1], - "tbl_summary" = select(x$inputs$data, any_of(x$meta_data$variable)), - "tbl_svysummary" = select(x$inputs$data$variables, any_of(x$meta_data$variable)) - ), - var_info = x$table_body, - arg_name = "fns", - type_check = chuck(type_check, "is_function", "fn"), - type_check_msg = chuck(type_check, "is_function", "msg") - ) # setting new column name ---------------------------------------------------- stat_col_name <- - select(x$table_body, dplyr::matches("^add_stat_\\d*[1-9]\\d*$")) %>% - names() %>% + dplyr::select(x$table_body, dplyr::matches("^add_stat_\\d*[1-9]\\d*$")) |> + names() |> length() %>% - { - paste0("add_stat_", . + 1) - } + {paste0("add_stat_", . + 1)} # styler: off # calculating statistics ----------------------------------------------------- df_new_stat <- - tibble(variable = names(fns)) %>% - left_join(x$meta_data %>% select("variable", "summary_type"), - by = "variable" - ) %>% - mutate( - row_type = map_chr(.data$variable, ~ location[[.x]] %||% "label"), + dplyr::tibble(variable = names(fns)) |> + dplyr::mutate( + summary_type = map_chr(.data$variable, ~ x$inputs$type[[.x]]), + row_type = map_chr(.data$variable, ~ location[[.x]]), label = map2( .data$variable, .data$row_type, - ~ filter(x$table_body, .data$variable == .x, .data$row_type == .y)$label + ~ dplyr::filter(x$table_body, .data$variable == .x, .data$row_type == .y)$label ) - ) %>% + ) |> mutate( - df_add_stats = purrr::imap(fns, ~ eval_fn_safe(tbl = x, variable = .y, fn = .x)) - ) %>% + df_add_stats = + imap( + fns, + ~ eval_fn_safe(tbl = x, variable = .y, fn = .x) # TODO: UPDATE THIS NOW! + ) + ) |> select(-"summary_type") # converting returned statistics to a tibble if not already ------------------ df_new_stat$df_add_stats <- - df_new_stat$df_add_stats %>% - map(~ switch(is.data.frame(.x), - .x - ) %||% tibble(!!stat_col_name := .x)) + df_new_stat$df_add_stats |> + map(~ switch(is.data.frame(.x), .x) %||% dplyr::tibble(!!stat_col_name := .x)) # styler: off # check dims of calculated statistics ---------------------------------------- - purrr::pwalk( + pmap( list(df_new_stat$variable, df_new_stat$label, df_new_stat$df_add_stats), function(variable, label, df_add_stats) { if (nrow(df_add_stats) != length(label)) { - glue( - "Dimension of '{variable}' and the added statistic do not match. ", - "Expecting statistic/data frame to be length/no. rows {length(label)}." - ) %>% - abort() + cli::cli_abort( + c("Dimension of {.val {variable}} and the added statistic do not match.", + i = "Expecting statistic/data frame to be length/no. rows {.val {length(label)}}."), + call = get_cli_abort_call() + ) } } ) # check new column names do not exist in `x$table_body` - new_col_names <- bind_rows(df_new_stat$df_add_stats) %>% names() + new_col_names <- dplyr::bind_rows(df_new_stat$df_add_stats) |> names() if (any(new_col_names %in% names(x$table_body))) { - paste( - "Cannot add new column that already exist in gtsummary table:", - "{.field {quoted_list(new_col_names %in% intersect(names(x$table_body)))}}" - ) %>% - abort() + cli::cli_abort( + "Cannot add new column that already exist in {.cls gtsummary} table: + {.val {new_col_names |> intersect(names(x$table_body))}}.", + call = get_cli_abort_call() + ) } # merging new columns with `x$table_body` ------------------------------------ - x <- - x %>% + x <- x |> modify_table_body( - left_join, - df_new_stat %>% tidyr::unnest(cols = c("label", "df_add_stats")), + dplyr::left_join, + df_new_stat |> tidyr::unnest(cols = c("label", "df_add_stats")), by = c("variable", "row_type", "label") ) %>% # showing all new columns @@ -273,12 +207,12 @@ add_stat <- function(x, fns, location = NULL, ...) { # assigning a default fmt_fun modify_table_styling( columns = c(where(is.numeric) & all_of(new_col_names)), - fmt_fun = function(x) style_sigfig(x, digits = 3) - ) %>% + fmt_fun = styfn_sigfig(digits = 3) + ) |> # if a numeric column is called 'p.value' or 'q.value', giving p-value default formatting modify_table_styling( - columns = c(where(is.numeric) & any_of(c("p.value", "q.value"))), - fmt_fun = get_theme_element("pkgwide-fn:pvalue_fun", default = style_pvalue) + columns = c(where(is.numeric) & any_of(intersect(c("p.value", "q.value"), new_col_names))), + fmt_fun = get_theme_element("pkgwide-fn:pvalue_fun", default = styfn_pvalue()) ) # return tbl_summary object -------------------------------------------------- @@ -290,35 +224,28 @@ add_stat <- function(x, fns, location = NULL, ...) { eval_fn_safe <- function(variable, tbl, fn) { - tryCatch( - withCallingHandlers( - { - # initializing to NA - stat <- NA_real_ - stat <- rlang::call2( - fn, - data = tbl$inputs$data, - variable = variable, - by = tbl$inputs$by, - tbl = tbl - ) %>% - eval() - }, - # printing warning and errors as message - warning = function(w) { - message(glue( - "There was an warning for variable '{variable}':\n ", as.character(w) - )) - invokeRestart("muffleWarning") - } - ), - error = function(e) { - message(glue( - "There was an error for variable '{variable}':\n", as.character(e) - )) - return(NA_real_) - } - ) + result <- + cards::eval_capture_conditions( + exec( + fn, + data = tbl$inputs$data, + variable = variable, + by = tbl$inputs$by, + tbl = tbl + ) + ) + + if (!is_empty(result[["warning"]])) { + cli::cli_inform( + c("There was a warning for variable {.val {variable}}", "!" = result[["warning"]]) + ) + } + if (!is_empty(result[["error"]])) { + cli::cli_inform( + c("There was a error for variable {.val {variable}}", "x" = result[["error"]]) + ) + } - stat + # return result + result[["result"]] %||% NA_real_ } diff --git a/R/add_stat_label.R b/R/add_stat_label.R index 021b3c278..dc2da416c 100644 --- a/R/add_stat_label.R +++ b/R/add_stat_label.R @@ -1,7 +1,19 @@ #' Add statistic labels #' -#' Adds labels describing the summary statistics presented for -#' each variable in the [tbl_summary] / [tbl_svysummary] table. +#' `r lifecycle::badge('questioning')`\cr +#' Adds or modifies labels describing the summary statistics presented for +#' each variable in a [`tbl_summary()`] table. +#' +#' @param x (`tbl_summary`)\cr +#' Object with class `'tbl_summary'` or with class `'tbl_svysummary'` +#' @param location (`string`)\cr +#' Location where statistic label will be included. +#' `"row"` (the default) to add the statistic label to the variable label row, +#' and `"column"` adds a column with the statistic label. +#' @param label ([`formula-list-selector`][syntax])\cr +#' indicates the updates to the statistic label, e.g. `label = all_categorical() ~ "No. (%)"`. +#' When not specified, the default statistic labels are used. +#' @inheritParams rlang::args_dots_empty #' #' @section Tips: #' @@ -20,8 +32,8 @@ #' For example, the following two tables merge properly #' #' ```r -#' tbl1 <- trial %>% select(age, grade) %>% tbl_summary() %>% add_stat_label() -#' tbl2 <- lm(marker ~ age + grade, trial) %>% tbl_regression() +#' tbl1 <- trial %>% select(age, grade) |> tbl_summary() |> add_stat_label() +#' tbl2 <- lm(marker ~ age + grade, trial) |> tbl_regression() #' #' tbl_merge(list(tbl1, tbl2)) #' ``` @@ -33,176 +45,180 @@ #' output for categorical variables identical what was produced without #' a `"add_stat_label()"` function call. #' -#' @param x Object with class `tbl_summary` from the [tbl_summary] function -#' or with class `tbl_svysummary` from the [tbl_svysummary] function -#' @param location location where statistic label will be included. -#' `"row"` (the default) to add the statistic label to the variable label row, -#' and `"column"` adds a column with the statistic label. -#' @param label a list of formulas or a single formula updating the statistic -#' label, e.g. `label = all_categorical() ~ "No. (%)"` -#' @family tbl_summary tools -#' @family tbl_svysummary tools -#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary #' @author Daniel D. Sjoberg -#' @export +#' @name add_stat_label #' @return A `tbl_summary` or `tbl_svysummary` object +#' #' @examples -#' \donttest{ -#' tbl <- trial %>% -#' dplyr::select(trt, age, grade, response) %>% +#' tbl <- trial |> +#' dplyr::select(trt, age, grade, response) |> #' tbl_summary(by = trt) #' #' # Example 1 ---------------------------------- #' # Add statistic presented to the variable label row -#' add_stat_label_ex1 <- -#' tbl %>% +#' tbl |> #' add_stat_label( #' # update default statistic label for continuous variables #' label = all_continuous() ~ "med. (iqr)" #' ) #' #' # Example 2 ---------------------------------- -#' add_stat_label_ex2 <- -#' tbl %>% +#' tbl |> #' add_stat_label( #' # add a new column with statistic labels #' location = "column" #' ) #' #' # Example 3 ---------------------------------- -#' add_stat_label_ex3 <- -#' trial %>% -#' select(age, grade, trt) %>% +#' trial |> +#' select(age, grade, trt) |> #' tbl_summary( #' by = trt, #' type = all_continuous() ~ "continuous2", -#' statistic = all_continuous() ~ c("{mean} ({sd})", "{min} - {max}"), -#' ) %>% -#' add_stat_label(label = age ~ c("Mean (SD)", "Min - Max")) -#' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_stat_label_ex1.png", width = "60")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_stat_label_ex2.png", width = "60")` -#' }} -#' -#' \if{html}{Example 3} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_stat_label_ex3.png", width = "45")` -#' }} +#' statistic = all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"), +#' ) |> +#' add_stat_label(label = age ~ c("IQR", "Range")) +NULL + +#' @export +#' @rdname add_stat_label +add_stat_label <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + UseMethod("add_stat_label") +} -add_stat_label <- function(x, location = NULL, label = NULL) { +#' @export +#' @rdname add_stat_label +add_stat_label.tbl_summary <- function(x, location = c("row", "column"), label = NULL, ...) { + set_cli_abort_call() + check_dots_empty() updated_call_list <- c(x$call_list, list(add_stat_label = match.call())) - # checking inputs ------------------------------------------------------------ - .assert_class(x, c("tbl_summary", "tbl_svysummary")) - # if `add_stat_label()` already run, return unmodified ----------------------- + # check inputs --------------------------------------------------------------- + check_not_missing(label) if ("add_stat_label" %in% names(x$call_list)) { - cli_alert_info("{.code add_stat_label()} has previously been applied. Returning {.field gtsummary} table unaltered.") + cli::cli_inform("{.code add_stat_label()} has previously been applied. Returning {.pkg gtsummary} table unaltered.") return(x) } - # setting defaults ----------------------------------------------------------- - location <- location %||% - get_theme_element("add_stat_label-arg:location") %>% - match.arg(choices = c("row", "column")) + # process arguments ---------------------------------------------------------- + if (missing(location)) { + location <- get_theme_element("add_stat_label-arg:location", default = location) + location <- arg_match(location) + } - # processing statistics label ------------------------------------------------ - # converting input to named list - label <- - .formula_list_to_named_list( - x = label, - data = - switch(!is_survey(x$inputs$data), - x$inputs$data[x$meta_data$variable] - ) %||% - x$inputs$data$variables[x$meta_data$variable], - var_info = meta_data_to_var_info(x$meta_data), - arg_name = "label", - type_check = chuck(type_check, "is_character", "fn"), - type_check_msg = chuck(type_check, "is_character", "msg") - ) + cards::process_formula_selectors( + select_prep(x$table_body), + label = label + ) + # check the label values are all string (except continuous2) + imap( + label[intersect(names(label), x$inputs$type |> discard(~.x == "continuous2") |> names())], + function(.x, .y) { + if (!is_string(.x)) { + cli::cli_abort( + "Elements of the {.arg label} argument for variable {.val {.y}} must be a string of length 1.", + all = get_cli_abort_call() + ) + } + } + ) + updated_variables <- names(label) # variable whose stat labels were updated from the defaults + cards::fill_formula_selectors( + select_prep(x$table_body), + label = .add_stat_label_default_label_arg(x, statistic = x$inputs$statistic) + ) - # stat_label column + # create df_stat_label to merge into `.$table_body`, + # (does not include continuous2 variables) df_stat_label <- - x$meta_data %>% - filter(!.data$summary_type %in% "continuous2") %>% - select("variable", "stat_label") %>% - tibble::deframe() %>% - # updating the default values with values in label - purrr::imap_chr(~ label[[.y]] %||% .x) %>% - tibble::enframe("variable", "stat_label") - - # adding stat_label to `.$table_body` - x <- - x %>% - modify_table_body( - ~ .x %>% - left_join(df_stat_label, by = "variable") %>% - dplyr::relocate("stat_label", .after = "label") %>% - mutate( - # adding in "n" for missing rows, and header - stat_label = case_when( - .data$row_type == "missing" ~ "n", - TRUE ~ .data$stat_label - ), - # setting some rows to NA depending on output type - stat_label = - switch(location, - "row" = ifelse(.data$row_type %in% "label", .data$stat_label, NA), - "column" = - ifelse( - .data$row_type %in% "label" & .data$var_type %in% "categorical", - NA, .data$stat_label - ) - ) + x$table_body[c("variable", "var_type")] |> + dplyr::distinct() |> + dplyr::filter(!.data$var_type %in% "continuous2") |> + dplyr::mutate( + stat_label = map_chr(.data$variable, ~label[[.x]]), + row_type = + ifelse( + .env$location %in% "column" & .data$var_type %in% "categorical", + "level", + "label" ) - ) %>% - # removing stat label footnote - modify_footnote(all_stat_cols() ~ NA_character_) + ) + + if (location %in% "column") { + df_stat_label <- + df_stat_label %>% + dplyr::bind_rows( + dplyr::select(., "variable", "var_type") |> + dplyr::mutate( + row_type = "missing", + stat_label = + glue::glue( + x$inputs$missing_stat, + .envir = list(N_obs = "N", + N_miss = "n", + N_nonmiss = "N - n", + p_miss = "p", + p_nonmiss = "1 - p") + ) + ) + ) + } + + # update the label column for continuous2 variables -------------------------- + cont2_vars_with_new_label <- names(keep(x$inputs$type, ~ .x == "continuous2")) |> intersect(updated_variables) + if (!is_empty(cont2_vars_with_new_label)) { + # first, check the dimension of the passed value + walk( + cont2_vars_with_new_label, + function(.x) { + if (!is.character(label[[.x]]) || length(label[[.x]]) != length(x$inputs$statistic[[.x]])) { + cli::cli_abort( + "The element of the {.arg label} argument for variable {.val {.x}} + must be a string of length {.val {length(x$inputs$statistic[[.x]])}}.", + call = get_cli_abort_call() + ) + } + } + ) - # updating `continuous2` stat labels if they exist --------------------------- - df_con2_update <- - x$meta_data %>% - filter(.data$summary_type %in% "continuous2") %>% - select("variable", "summary_type", "stat_label") %>% - mutate( - stat_label = map2(.data$stat_label, .data$variable, ~ label[[.y]] %||% .x), - row_type = "level" - ) %>% - tidyr::unnest("stat_label") %>% - dplyr::rename(var_type = "summary_type", label = "stat_label") - rows_to_update <- - x$table_body$variable %in% unique(df_con2_update$variable) & - x$table_body$var_type %in% "continuous2" & - x$table_body$row_type %in% "level" - if (nrow(df_con2_update) != sum(rows_to_update)) { - abort("`label=` dimensions do not match for type `continuous2` variables.") + # now update the label column of .$table_body + for (variable in cont2_vars_with_new_label) { + x$table_body$label[x$table_body$variable %in% variable & x$table_body$row_type %in% "level"] <- + label[[variable]] + } } - x$table_body$label[which(rows_to_update)] <- df_con2_update$label - # if adding stat labels to row, then adding merge instructions --------------- - if (location == "row") { - x <- + # add df_stat_label to `.$table_body` ---------------------------------------- + x <- x |> + modify_table_body( + ~ dplyr::left_join( + .x, + df_stat_label, + by = c("variable", "var_type", "row_type") + ) |> + dplyr::relocate("stat_label", .after = "label") + ) |> + modify_table_styling( + columns = "stat_label", + hide = location %in% "row", + label = paste0("**", translate_text("Statistic"), "**") + ) |> + # removing stat footnote, since it's in the table now + modify_table_styling( + columns = all_stat_cols(), + footnote = NA_character_ + ) + + if (location %in% "row") { + x <- x |> modify_table_styling( - x, columns = "label", rows = !is.na(.data$stat_label), cols_merge_pattern = "{label}, {stat_label}" ) } - # unhiding column if requested ----------------------------------------------- - else if (location == "column") { - x <- modify_header(x, stat_label ~ paste0("**", translate_text("Statistic"), "**")) - } # keeping track of all functions previously run ------------------------------ # fill in the Ns in the header table modify_stat_* columns @@ -211,3 +227,25 @@ add_stat_label <- function(x, location = NULL, label = NULL) { x } + +.add_stat_label_default_label_arg <- function(x, statistic) { + statistic |> + imap( + function(full_statistic, variable) { + map_chr( + full_statistic, + function(sub_statistic) { + eval_tidy( + expr(glue::glue(gsub("\\{(p|p_miss|p_nonmiss|p_unweighted)\\}%", "{\\1}", x = sub_statistic))), + cards::get_ard_statistics( + x$cards[[1]] |> + dplyr::filter(.data$variable %in% variable) |> + dplyr::distinct(.data$stat_name, .data$stat_label), + .column = "stat_label" + ) + ) + } + ) + } + ) +} diff --git a/R/as_gt.R b/R/as_gt.R index 9ef8eb2f1..f80fbfa3b 100644 --- a/R/as_gt.R +++ b/R/as_gt.R @@ -6,44 +6,30 @@ #' A user can use this function if they wish to add customized formatting #' available via the [gt package](https://gt.rstudio.com/index.html). #' -#' @description Review the -#' \href{https://www.danieldsjoberg.com/gtsummary/articles/tbl_summary.html#advanced}{tbl_summary vignette} -#' or -#' \href{https://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html#advanced}{tbl_regression vignette} -#' for detailed examples in the 'Advanced Customization' section. -#' -#' @param x Object created by a function from the gtsummary package -#' (e.g. [tbl_summary] or [tbl_regression]) +#' @param x An object of class `"gtsummary" #' @param include Commands to include in output. Input may be a vector of #' quoted or unquoted names. tidyselect and gtsummary select helper #' functions are also accepted. #' Default is `everything()`. #' @param return_calls Logical. Default is `FALSE`. If `TRUE`, the calls are returned #' as a list of expressions. -#' @param ... Arguments passed on to [gt::gt] +#' @param ... Arguments passed on to `gt::gt(...)` #' @return A `gt_tbl` object #' @family gtsummary output types #' @author Daniel D. Sjoberg #' @export #' @examples -#' # Example 1 ---------------------------------- -#' as_gt_ex1 <- -#' trial[c("trt", "age", "response", "grade")] %>% -#' tbl_summary(by = trt) %>% -#' as_gt() -#' @section Example Output: -#' -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "as_gt_ex1.png", width = "50")` -#' }} - +#' # # Example 1 ---------------------------------- +#' # as_gt_ex1 <- +#' # trial[c("trt", "age", "response", "grade")] %>% +#' # tbl_summary(by = trt) %>% +#' # as_gt() as_gt <- function(x, include = everything(), return_calls = FALSE, ...) { - .assert_class(x, "gtsummary") + set_cli_abort_call() + check_class(x, "gtsummary") # running pre-conversion function, if present -------------------------------- - x <- do.call(get_theme_element("pkgwide-fun:pre_conversion", default = identity), list(x)) + # x <- do.call(get_theme_element("pkgwide-fun:pre_conversion", default = identity), list(x)) # merging column specified in `x$table_styling$cols_merge` ------------------- # UPDATE THIS WHEN `gt::cols_merge(rows=)` argument is added! @@ -58,7 +44,7 @@ as_gt <- function(x, include = everything(), return_calls = FALSE, ...) { # adding user-specified calls ------------------------------------------------ insert_expr_after <- get_theme_element("as_gt-lst:addl_cmds") gt_calls <- - purrr::reduce( + reduce( .x = seq_along(insert_expr_after), .f = function(x, y) { add_expr_after( @@ -72,12 +58,10 @@ as_gt <- function(x, include = everything(), return_calls = FALSE, ...) { ) # converting to character vector --------------------------------------------- - include <- - .select_to_varnames( - select = {{ include }}, - var_info = names(gt_calls), - arg_name = "include" - ) + cards::process_selectors( + data = vec_to_df(names(gt_calls)), + include = {{ include }} + ) # user cannot omit the first 'gt' command include <- "gt" %>% union(include) @@ -88,9 +72,7 @@ as_gt <- function(x, include = everything(), return_calls = FALSE, ...) { } # taking each gt function call, concatenating them with %>% separating them - gt_calls[include] %>% - c(parse_expr(.get_deprecated_option("gtsummary.as_gt.addl_cmds", default = "NULL"))) %>% - .eval_list_of_exprs() + .eval_list_of_exprs(gt_calls[include]) } # creating gt calls from table_styling ----------------------------------------- @@ -105,8 +87,9 @@ table_styling_to_gt_calls <- function(x, ...) { caption <- switch(!is.null(x$table_styling$caption), rlang::call2( - attr(x$table_styling$caption, "text_interpret"), - x$table_styling$caption + .fn = attr(x$table_styling$caption, "text_interpret"), + x$table_styling$caption, + .ns = "gt" ) ) gt_calls[["gt"]] <- @@ -137,9 +120,9 @@ table_styling_to_gt_calls <- function(x, ...) { df_cols_align <- x$table_styling$header %>% select("column", "align") %>% - group_by(.data$align) %>% - nest() %>% - mutate(cols = map(.data$data, ~ pull(.x, column))) + dplyr::group_by(.data$align) %>% + tidyr::nest() %>% + dplyr::mutate(cols = map(.data$data, ~ dplyr::pull(.x, column))) gt_calls[["cols_align"]] <- map( @@ -151,21 +134,20 @@ table_styling_to_gt_calls <- function(x, ...) { ) # indent --------------------------------------------------------------------- - df_indent <- x$table_styling$text_format %>% filter(.data$format_type == "indent") gt_calls[["indent"]] <- map( - seq_len(nrow(df_indent)), + seq_len(nrow(x$table_styling$indent)), ~ expr(gt::text_transform( locations = gt::cells_body( - columns = !!df_indent$column[[.x]], - rows = !!df_indent$row_numbers[[.x]] + columns = !!x$table_styling$indent$column[[.x]], + rows = !!x$table_styling$indent$row_numbers[[.x]] ), - fn = function(x) paste0("\U00A0\U00A0\U00A0\U00A0", x) + fn = function(x) paste0(!!paste(rep_len("\U00A0", x$table_styling$indent$n_spaces[[.x]]), collapse = ""), x) )) ) # indent2 -------------------------------------------------------------------- - df_indent2 <- x$table_styling$text_format %>% filter(.data$format_type == "indent2") + df_indent2 <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "indent2") gt_calls[["indent2"]] <- map( seq_len(nrow(df_indent2)), @@ -190,7 +172,7 @@ table_styling_to_gt_calls <- function(x, ...) { ) # tab_style_bold ------------------------------------------------------------- - df_bold <- x$table_styling$text_format %>% filter(.data$format_type == "bold") + df_bold <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "bold") gt_calls[["tab_style_bold"]] <- map( seq_len(nrow(df_bold)), @@ -204,7 +186,7 @@ table_styling_to_gt_calls <- function(x, ...) { ) # tab_style_italic ----------------------------------------------------------- - df_italic <- x$table_styling$text_format %>% filter(.data$format_type == "italic") + df_italic <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "italic") gt_calls[["tab_style_italic"]] <- map( seq_len(nrow(df_italic)), @@ -235,17 +217,17 @@ table_styling_to_gt_calls <- function(x, ...) { gt_calls[["tab_footnote"]] <- list() } else { df_footnotes <- - bind_rows( + dplyr::bind_rows( x$table_styling$footnote, x$table_styling$footnote_abbrev ) %>% - nest(data = c("column", "row_numbers")) %>% - rowwise() %>% - mutate( - columns = .data$data %>% pull("column") %>% unique() %>% list(), - rows = .data$data %>% pull("row_numbers") %>% unique() %>% list() + tidyr::nest(data = c("column", "row_numbers")) %>% + dplyr::rowwise() %>% + dplyr::mutate( + columns = .data$data %>% dplyr::pull("column") %>% unique() %>% list(), + rows = .data$data %>% dplyr::pull("row_numbers") %>% unique() %>% list() ) %>% - ungroup() + dplyr::ungroup() df_footnotes$footnote_exp <- map2( df_footnotes$text_interpret, @@ -284,17 +266,17 @@ table_styling_to_gt_calls <- function(x, ...) { # spanning_header ------------------------------------------------------------ df_spanning_header <- x$table_styling$header %>% - select("column", "interpret_spanning_header", "spanning_header") %>% - filter(!is.na(.data$spanning_header)) %>% - nest(cols = "column") %>% - mutate( + dplyr::select("column", "interpret_spanning_header", "spanning_header") %>% + dplyr::filter(!is.na(.data$spanning_header)) %>% + tidyr::nest(cols = "column") %>% + dplyr::mutate( spanning_header = map2( .data$interpret_spanning_header, .data$spanning_header, ~ call2(parse_expr(.x), .y) ), - cols = map(.data$cols, ~ pull(.x)) + cols = map(.data$cols, ~ dplyr::pull(.x)) ) %>% - select("spanning_header", "cols") + dplyr::select("spanning_header", "cols") gt_calls[["tab_spanner"]] <- map( diff --git a/R/as_tibble.R b/R/as_tibble.R index 1e697a95a..66fae3ec1 100644 --- a/R/as_tibble.R +++ b/R/as_tibble.R @@ -2,7 +2,7 @@ #' #' Function converts a gtsummary object to a tibble. #' -#' @inheritParams as_kable +#' @inheritParams as_gt #' @param col_labels Logical argument adding column labels to output tibble. #' Default is `TRUE`. #' @param fmt_missing Logical argument adding the missing value formats. @@ -29,8 +29,7 @@ NULL #' @rdname as_tibble.gtsummary as_tibble.gtsummary <- function(x, include = everything(), col_labels = TRUE, return_calls = FALSE, fmt_missing = FALSE, ...) { - check_dots_empty(error = function(e) inform(c(e$message, e$body))) - + set_cli_abort_call() # running pre-conversion function, if present -------------------------------- x <- do.call(get_theme_element("pkgwide-fun:pre_conversion", default = identity), list(x)) @@ -46,12 +45,10 @@ as_tibble.gtsummary <- function(x, include = everything(), col_labels = TRUE, ) # converting to character vector --------------------------------------------- - include <- - .select_to_varnames( - select = {{ include }}, - var_info = names(tibble_calls), - arg_name = "include" - ) + cards::process_selectors( + data = vec_to_df(names(tibble_calls)), + include = {{ include }} + ) # making list of commands to include ----------------------------------------- # this ensures list is in the same order as names(x$kable_calls) @@ -71,10 +68,12 @@ as_tibble.gtsummary <- function(x, include = everything(), col_labels = TRUE, #' @export #' @rdname as_tibble.gtsummary as.data.frame.gtsummary <- function(...) { + set_cli_abort_call() res <- as_tibble(...) - if (inherits(res, "data.frame")) + if (inherits(res, "data.frame")) { return(as.data.frame(res)) + } res } @@ -90,12 +89,12 @@ table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FA if ("groupname_col" %in% x$table_styling$header$column) { tibble_calls[["ungroup"]] <- list( - expr(group_by(.data$groupname_col)), - expr(mutate(groupname_col = ifelse(dplyr::row_number() == 1, + expr(dplyr::group_by(.data$groupname_col)), + expr(dplyr::mutate(groupname_col = ifelse(dplyr::row_number() == 1, as.character(.data$groupname_col), NA_character_ ))), - expr(ungroup()) + expr(dplyr::ungroup()) ) } @@ -110,7 +109,7 @@ table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FA map( seq_len(nrow(x$table_styling$cols_merge)), ~ expr( - mutate( + dplyr::mutate( !!x$table_styling$cols_merge$column[.x] := ifelse( dplyr::row_number() %in% !!x$table_styling$cols_merge$rows[[.x]], @@ -122,12 +121,12 @@ table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FA ) # tab_style_bold ------------------------------------------------------------- - df_bold <- x$table_styling$text_format %>% filter(.data$format_type == "bold") + df_bold <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "bold") tibble_calls[["tab_style_bold"]] <- map( seq_len(nrow(df_bold)), - ~ expr(mutate_at( + ~ expr(dplyr::mutate_at( gt::vars(!!!syms(df_bold$column[[.x]])), ~ ifelse(row_number() %in% !!df_bold$row_numbers[[.x]], paste0("__", ., "__"), . @@ -136,14 +135,14 @@ table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FA ) # tab_style_italic ------------------------------------------------------------- - df_italic <- x$table_styling$text_format %>% filter(.data$format_type == "italic") + df_italic <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "italic") tibble_calls[["tab_style_italic"]] <- map( seq_len(nrow(df_italic)), - ~ expr(mutate_at( + ~ expr(dplyr::mutate_at( gt::vars(!!!syms(df_italic$column[[.x]])), - ~ ifelse(row_number() %in% !!df_italic$row_numbers[[.x]], + ~ ifelse(dplyr::row_number() %in% !!df_italic$row_numbers[[.x]], paste0("_", ., "_"), . ) )) @@ -204,7 +203,8 @@ table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FA # apply formatting functions df_updated <- update_from[row_numbers, columns, drop = FALSE] %>% - purrr::map_dfc(~ fmt_fun(.x)) + map(~ fmt_fun(.x)) |> + dplyr::bind_cols() # convert underlying column to character if updated col is character for (v in columns) { @@ -213,7 +213,7 @@ table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FA } } - # udpate data and return + # update data and return data[row_numbers, columns, drop = FALSE] <- df_updated data diff --git a/R/assign_summary_digits.R b/R/assign_summary_digits.R new file mode 100644 index 000000000..fbe116cbf --- /dev/null +++ b/R/assign_summary_digits.R @@ -0,0 +1,157 @@ +#' Assign Default Digits +#' +#' Used to assign the default formatting for variables summarized with +#' `tbl_summary()`. +#' +#' @param data (`data.frame`)\cr +#' a data frame +#' @param statistic (`named list`)\cr +#' a named list; notably, _not_ a [`formula-list-selector`][syntax] +#' @param type (`named list`)\cr +#' a named list; notably, _not_ a [`formula-list-selector`][syntax] +#' @param digits (`named list`)\cr +#' a named list; notably, _not_ a [`formula-list-selector`][syntax]. +#' Default is `NULL` +#' +#' @return a named list +#' @export +#' +#' @examples +#' assign_summary_digits( +#' mtcars, +#' statistic = list(mpg = "{mean}"), +#' type = list(mpg = "continuous") +#' ) +assign_summary_digits <- function(data, statistic, type, digits = NULL) { + set_cli_abort_call() + # stats returned for all variables + lst_cat_summary_fns <- .categorical_summary_functions(c("n", "p")) + lst_all_fmt_fns <- + .categorical_summary_functions(c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss")) + + # extract the statistics + statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist()) + + lapply( + names(statistic), + function(variable) { + # if user passed digits AND they've specified every statistic, use the passed value + # otherwise, we need to calculate the defaults, and later we can update with the pieces the user passed + if (!is.null(digits[[variable]])) { + # if a scalar or vector passed, convert it to a list + if (!is.list(digits[[variable]]) && is_vector(digits[[variable]])) { + digits[[variable]] <- as.list(digits[[variable]]) + } + + # if user-passed value is not named, repeat the passed value to the length of 'statistic' + if (!is_named(digits[[variable]])) { + digits[[variable]] <- rep_named(statistic[[variable]], digits[[variable]]) + } + + # convert integers to a proper function + digits[[variable]] <- .convert_integer_to_fmt_fn(digits[[variable]]) + + # if the passed value fully specifies the formatting for each 'statistic', + # then return it. Otherwise, the remaining stat will be filled below + if (setequal(statistic[[variable]], names(digits[[variable]]))) { + return(lst_all_fmt_fns |> utils::modifyList(digits[[variable]])) + } + } + + if (type[[variable]] %in% c("categorical", "dichotomous")) { + return( + c(lst_cat_summary_fns, lst_all_fmt_fns) |> + utils::modifyList(digits[[variable]] %||% list()) + ) + } + + if (type[[variable]] %in% c("continuous", "continuous2")) { + return( + rep_named( + statistic[[variable]], + list(.guess_continuous_summary_digits(data[[variable]])) + ) |> + utils::modifyList(lst_all_fmt_fns) |> + utils::modifyList(digits[[variable]] %||% list()) + ) + } + } + ) |> + stats::setNames(names(statistic)) +} + +.convert_integer_to_fmt_fn <- function(x) { + imap( + x, + function(value, stat_name) { + # if not an integer, simply return the value + if (!is_integerish(value)) { + return(value) + } + # if an integer is passed for a percentage, process stat with style_percent() + if (stat_name %in% c("p", "p_miss", "p_nonmiss", "p_unweighted")) { + return(styfn_percent(digits = value)) + } + # otherwise, use style_number() to style number + return(styfn_number(digits = value)) + } + ) +} + +.guess_continuous_summary_digits <- function(x) { + # if all missing, return 0 + if (all(is.na(x))) { + return(styfn_number(digits = 0L)) + } + + # if class is integer, then round everything to nearest integer + if (inherits(x, "integer")) { + return(styfn_number(digits = 0L)) + } + + # if it's a date or time, then convert the result to character + if (is_date_time(x)) { + return(as.character) + } + + # otherwise guess the number of digits to use based on the spread + # calculate the spread of the variable + tryCatch( + { + var_spread <- + stats::quantile(x, probs = c(0.95), na.rm = TRUE) - + stats::quantile(x, probs = c(0.05), na.rm = TRUE) + + styfn_number( + digits = + dplyr::case_when( + var_spread < 0.01 ~ 4L, + var_spread >= 0.01 & var_spread < 0.1 ~ 3L, + var_spread >= 0.1 & var_spread < 10 ~ 2L, + var_spread >= 10 & var_spread < 20 ~ 1L, + var_spread >= 20 ~ 0L + ) + ) + }, + error = function(e) 0L + ) +} + +.categorical_summary_functions <- + function(statistics = c( + " + N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted", + "p_miss", "p_nonmiss", "p_unweighted" + )) { + lst_defaults <- + c( + c("n", "N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted") |> + intersect(statistics) |> + rep_named(list(styfn_number())), + c("p", "p_miss", "p_nonmiss", "p_unweighted") |> + intersect(statistics) |> + rep_named(list(styfn_percent())) + ) + + lst_defaults + } diff --git a/R/assign_summary_type.R b/R/assign_summary_type.R new file mode 100644 index 000000000..bdaeeaf52 --- /dev/null +++ b/R/assign_summary_type.R @@ -0,0 +1,117 @@ +#' Assign Default Summary Type +#' +#' Function inspects data and assigns a summary type when not specified +#' in the `type` argument. +#' +#' @param data (`data.frame`)\cr +#' a data frame +#' @param variables (`character`)\cr +#' character vector of column names in `data` +#' @param value (`named list`)\cr +#' named list of values to show for dichotomous variables, where +#' the names are the variables +#' @param type (`named list`)\cr +#' named list of summary types, where names are the variables +#' @param cat_threshold (`integer`)\cr +#' for base R numeric classes with fewer levels than +#' this threshold will default to a categorical summary. Default is `10L` +#' +#' @return named list +#' @export +#' +#' @examples +#' assign_summary_type( +#' data = trial, +#' variables = c("age", "grade", "response"), +#' value = NULL +#' ) +assign_summary_type <- function(data, variables, value, type = NULL, cat_threshold = 10L) { + set_cli_abort_call() + # base classes that can be summarized as continuous + base_numeric_classes <- c("numeric", "integer", "difftime", "Date", "POSIXt", "double") + + # assign a type + type <- + map( + variables, + function(variable) { + # return specified type if passed by user + if (!is.null(type[[variable]])) { + return(type[[variable]]) + } + + # if user supplied a dichotomous value, make it dichotomous + if (!is.null(value[[variable]])) { + return("dichotomous") + } + + # if a type with a default dichotomous value, make it dichotomous + if (!is.null(.get_default_dichotomous_value(data[[variable]]))) { + return("dichotomous") + } + + # factors are categorical + if (inherits(data[[variable]], "factor")) { + return("categorical") + } + + # if all missing, the continuous + if (all(is.na(data[[variable]]))) { + return("continuous") + } + + # characters are categorical + if (inherits(data[[variable]], "character")) { + return("categorical") + } + + # numeric variables with fewer than 'cat_threshold' levels will be categorical + if (inherits(data[[variable]], base_numeric_classes) && + length(unique(stats::na.omit(data[[variable]]))) < cat_threshold) { + return("categorical") + } + + # all other numeric classes are continuous + if (inherits(data[[variable]], base_numeric_classes)) { + return(get_theme_element("tbl_summary-str:default_con_type", default = "continuous")) + } + + # finally, summarize as categorical if none of the above criteria were met + return("categorical") + } + ) |> + stats::setNames(variables) + + # return type + type +} + +.get_default_dichotomous_value <- function(x) { + # logical variables are dichotomous + if (inherits(x, "logical")) { + return(TRUE) + } + + # numeric variables that are 0 and 1 only, will be dichotomous + if (inherits(x, c("integer", "numeric")) && + setequal(unique(stats::na.omit(x)), c(0, 1))) { + return(stats::na.omit(x) |> unique() |> sort() |> dplyr::last()) + } + + # factor variables that are "No" and "Yes" only, will be dichotomous + if (inherits(x, "factor") && + length(levels(x)) == 2L && + setequal(toupper(levels(x)), c("NO", "YES"))) { + return(levels(x)[toupper(levels(x)) %in% "YES"]) + } + + # character variables that are "No" and "Yes" only, will be dichotomous + if (inherits(x, "character") && + setequal(toupper(stats::na.omit(x)), c("NO", "YES")) && + length(unique(stats::na.omit(x))) == 2L) { + return(unique(x)[toupper(unique(x)) %in% "YES"]) + } + + # otherwise, return NULL + NULL +} diff --git a/R/assign_tests.R b/R/assign_tests.R new file mode 100644 index 000000000..a7b5aa70c --- /dev/null +++ b/R/assign_tests.R @@ -0,0 +1,229 @@ +#' Assign Test +#' +#' This function is used to assign default tests for `add_p()` +#' and `add_difference()`. +#' +#' @param x (`gtsummary`)\cr +#' a table of class `'gtsummary'` +#' @param test (named `list`)\cr +#' a named list of tests. +#' @param group (`string`)\cr +#' a variable name indicating the grouping column for correlated data. +#' Default is `NULL`. +#' @param adj.vars (`character`)\cr +#' Variables to include in adjusted calculations (e.g. in ANCOVA models). +#' @param include (`character`)\cr +#' Character vector of column names to assign a default tests. +#' @param calling_fun (`string`)\cr +#' Must be one of `'add_p'` and `'add_difference'`. Depending on the context, +#' different defaults are set. +#' @inheritParams cli::cli_abort +#' +#' @return A table of class `'gtsummary'` +#' @name assign_tests +#' +#' @examples +#' trial |> +#' tbl_summary( +#' by = trt, +#' include = c(age, stage) +#' ) |> +#' assign_tests(include = c("age", "stage"), calling_fun = "add_p") +NULL + +#' @rdname assign_tests +#' @export +assign_tests <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + UseMethod("assign_tests") +} + +#' @rdname assign_tests +#' @export +assign_tests.tbl_summary <- function(x, test = NULL, group = NULL, adj.vars = NULL, include, + calling_fun = c("add_p", "add_difference"), ...) { + set_cli_abort_call() + # processing inputs ---------------------------------------------------------- + calling_fun <- arg_match(calling_fun) + data <- x$inputs$data + by <- x$inputs$by + summary_type <- x$inputs$type + + # loop over the variables and assign default test if not provided by user + lapply( + include, + function(variable) { + if (is.null(test[[variable]])) { + test[[variable]] <- + switch(calling_fun, + "add_p" = + .add_p_tbl_summary_default_test(data, + variable = variable, + by = by, group = group, adj.vars = adj.vars, + summary_type = summary_type[[variable]] + ), + "add_difference" = + .add_difference_tbl_summary_default_test(data, + variable = variable, + by = by, group = group, adj.vars = adj.vars, + summary_type = summary_type[[variable]] + ) + ) + } + + if (is.null(test[[variable]])) { + cli::cli_abort( + c( + "There is no default test set for column {.val {variable}}.", + i = "Set a value in the {.arg test} argument for column {.val {variable}} or exclude with {.code include = -{variable}}." + ), + call = get_cli_abort_call() + ) + } + + test[[variable]] <- + .process_test_argument_value( + test = test[[variable]], + class = "tbl_summary", + calling_fun = calling_fun + ) + } + ) |> + stats::setNames(include) +} + + +.process_test_argument_value <- function(test, class, calling_fun) { + # subset the data frame + df_tests <- + df_add_p_tests |> + dplyr::filter(.data$class %in% .env$class, .data[[calling_fun]]) + + # if the test is character and it's an internal test + if (is.character(test) && test %in% df_tests$test_name) { + test_to_return <- df_tests$fun_to_run[df_tests$test_name %in% test][[1]] |> eval() + attr(test_to_return, "test_name") <- df_tests$test_name[df_tests$test_name %in% test] + return(test_to_return) + } + + # if the test is character and it's NOT an internal test + if (is.character(test)) { + return(eval(parse_expr(test), envir = attr(test, ".Environment"))) + } + + # if passed test is a function and it's an internal test + internal_test_index <- df_tests$test_fun |> + map_lgl(~ identical_no_attr(eval(.x), test)) |> + which() + if (is.function(test) && !is_empty(internal_test_index)) { + test_to_return <- df_tests$fun_to_run[[internal_test_index]] |> eval() + attr(test_to_return, "test_name") <- df_tests$test_name[internal_test_index] + return(test_to_return) + } + + # otherwise, if it's a function, return it + return(eval(test, envir = attr(test, ".Environment"))) +} + +# compare after removing attributes +identical_no_attr <- function(x, y) { + tryCatch( + { + attributes(x) <- NULL + attributes(y) <- NULL + identical(x, y) + }, + error = \(x) FALSE + ) +} + +.add_p_tbl_summary_default_test <- function(data, variable, by, group, adj.vars, summary_type) { + # for continuous data, default to non-parametric tests + if (is_empty(group) && is_empty(adj.vars) && summary_type %in% c("continuous", "continuous2") && length(unique(data[[by]])) == 2) { + test_func <- + get_theme_element("add_p.tbl_summary-attr:test.continuous_by2", default = "wilcox.test") + return(test_func) + } + if (is_empty(group) && is_empty(adj.vars) && summary_type %in% c("continuous", "continuous2")) { + test_func <- + get_theme_element("add_p.tbl_summary-attr:test.continuous", default = "kruskal.test") + return(test_func) + } + # now assign categorical default tests + if (is_empty(group) && is_empty(adj.vars) && summary_type %in% c("categorical", "dichotomous")) { + # calculate expected counts to select between chi-square and fisher + min_exp <- + tryCatch( + suppressWarnings( + table(data[[by]], data[[variable]]) |> + proportions() %>% + {expand.grid(rowSums(.), colSums(.))} |> #styler: off + dplyr::mutate( + exp = .data$Var1 * .data$Var2 * + sum(!is.na(data[[variable]]) & !is.na(data[[by]])) + ) %>% + dplyr::pull(exp) |> + min() + ), + error = \(e) Inf # if there is an error for whatever reason, return Inf + ) + # if expected counts are greater than 5, then chisq test + if (isTRUE(min_exp >= 5 || is.nan(min_exp))) { + test_func <- + get_theme_element("add_p.tbl_summary-attr:test.categorical", default = "chisq.test.no.correct") + return(test_func) + } + # otherwise fishers test + test_func <- + get_theme_element("add_p.tbl_summary-attr:test.categorical.low_count", default = "fisher.test") + return(test_func) + } + + # now setting default tests for grouped data + # if group variable supplied, fit a random effects model + if (!is_empty(group) && is_empty(adj.vars) && length(unique(data[[by]])) == 2) { + if (summary_type %in% c("continuous", "continuous2")) { + test_func <- + get_theme_element("add_p.tbl_summary-attr:test.continuous.group_by2", default = "lme4") + return(test_func) + } + if (summary_type %in% c("categorical", "dichotomous")) { + test_func <- + get_theme_element("add_p.tbl_summary-attr:test.categorical.group_by2", default = "lme4") + return(test_func) + } + } + + # now setting default tests for adjusted comparisons + if (is_empty(group) && !is_empty(adj.vars) && length(unique(data[[by]])) == 2) { + if (summary_type %in% c("continuous", "continuous2")) { + return("ancova") + } + } + + + return(NULL) +} + + +.add_difference_tbl_summary_default_test <- function(data, variable, by, group, adj.vars, summary_type) { + if (is_empty(group) && is_empty(adj.vars) && summary_type %in% c("continuous", "continuous2")) { + return("t.test") + } + if (is_empty(group) && summary_type %in% c("continuous", "continuous2")) { + return("ancova") + } + if (is_empty(group) && is_empty(adj.vars) && summary_type %in% "dichotomous") { + return("prop.test") + } + if (is_empty(group) && is_empty(adj.vars) && summary_type %in% "categorical") { + return("smd") + } + + if (!is_empty(group) && summary_type %in% c("continuous", "continuous2")) { + return("ancova_lme4") + } + + return(NULL) +} diff --git a/R/bold_italicize_labels_levels.R b/R/bold_italicize_labels_levels.R new file mode 100644 index 000000000..817a58edc --- /dev/null +++ b/R/bold_italicize_labels_levels.R @@ -0,0 +1,149 @@ +#' Bold or Italicize +#' +#' Bold or italicize labels or levels in gtsummary tables +#' +#' @name bold_italicize_labels_levels +#' @param x (`gtsummary`) +#' An object of class 'gtsummary' +#' @author Daniel D. Sjoberg +#' +#' @return Functions return the same class of gtsummary object supplied +#' @examples +#' # Example 1 ---------------------------------- +#' tbl_summary(trial, include = c("trt", "age", "response")) |> +#' bold_labels() |> +#' bold_levels() |> +#' italicize_labels() |> +#' italicize_levels() +NULL + + +#' @export +#' @rdname bold_italicize_labels_levels +bold_labels <- function(x) { + UseMethod("bold_labels") +} + +#' @export +#' @rdname bold_italicize_labels_levels +italicize_labels <- function(x) { + UseMethod("italicize_labels") +} + +#' @export +#' @rdname bold_italicize_labels_levels +bold_levels <- function(x) { + UseMethod("bold_levels") +} + + +#' @export +#' @rdname bold_italicize_labels_levels +italicize_levels <- function(x) { + UseMethod("italicize_levels") +} + +#' @export +#' @rdname bold_italicize_labels_levels +bold_labels.gtsummary <- function(x) { + updated_call_list <- c(x$call_list, list(bold_labels = match.call())) + # input checks --------------------------------------------------------------- + if (!"row_type" %in% x$table_styling$header$column) { + cli::cli_inform("{.code bold_labels()} cannot be used in this context.") + return(x) + } + + # bold labels ---------------------------------------------------------------- + x <- + modify_table_styling( + x, + columns = .first_unhidden_column(x), + rows = .data$row_type == "label", + text_format = "bold" + ) + + x$call_list <- updated_call_list + + x +} + +#' @export +#' @rdname bold_italicize_labels_levels +bold_levels.gtsummary <- function(x) { + updated_call_list <- c(x$call_list, list(bold_levels = match.call())) + # input checks --------------------------------------------------------------- + if (!"row_type" %in% x$table_styling$header$column) { + cli::cli_inform("{.code bold_levels()} cannot be used in this context.") + return(x) + } + + # bold levels ---------------------------------------------------------------- + x <- + modify_table_styling( + x, + columns = .first_unhidden_column(x), + rows = .data$row_type != "label", + text_format = "bold" + ) + + x$call_list <- updated_call_list + + x +} + + +#' @export +#' @rdname bold_italicize_labels_levels +italicize_labels.gtsummary <- function(x) { + updated_call_list <- c(x$call_list, list(italicize_labels = match.call())) + # input checks --------------------------------------------------------------- + if (!"row_type" %in% x$table_styling$header$column) { + cli::cli_inform("{.code italicize_labels()} cannot be used in this context.") + return(x) + } + + # italicize labels ----------------------------------------------------------- + x <- + modify_table_styling( + x, + columns = .first_unhidden_column(x), + rows = .data$row_type == "label", + text_format = "italic" + ) + + x$call_list <- updated_call_list + + x +} + +#' @export +#' @rdname bold_italicize_labels_levels +italicize_levels.gtsummary <- function(x) { + updated_call_list <- c(x$call_list, list(italicize_levels = match.call())) + # input checks --------------------------------------------------------------- + if (!"row_type" %in% x$table_styling$header$column) { + cli::cli_inform("{.code italicize_levels()} cannot be used in this context.") + return(x) + } + + # italicize levels ----------------------------------------------------------- + x <- + modify_table_styling( + x, + columns = .first_unhidden_column(x), + rows = .data$row_type != "label", + text_format = "italic" + ) + + x$call_list <- updated_call_list + + x +} + + +.first_unhidden_column <- function(x) { + x$table_styling$header |> + dplyr::filter(!.data$hide) |> + dplyr::pull("column") |> + dplyr::first() +} diff --git a/R/brdg_summary.R b/R/brdg_summary.R new file mode 100644 index 000000000..de90367d0 --- /dev/null +++ b/R/brdg_summary.R @@ -0,0 +1,609 @@ +#' Summary Table Bridges +#' +#' @description +#' Bridge function for converting `tbl_summary()` (and similar) cards to table bodies. +#' All bridge functions begin with prefix `brdg_*()`. +#' +#' This file also contains helper functions for constructing the bridge, +#' referred to as the piers (supports for a bridge) and begin with `pier_*()`. +#' +#' - `brdg_summary()`: The bridge function ingests an ARD data frame and returns +#' a gtsummary table that includes `.$table_body` and a basic `.$table_styling`. +#' The `.$table_styling$header` data frame includes the header statistics. +#' Based on context, this function adds a column to the ARD data frame named +#' `"gts_column"`. This column is used during the reshaping in the `pier_*()` +#' functions defining column names. +#' +#' - `pier_*()`: these functions accept a cards tibble and returns a tibble +#' that is a piece of the `.$table_body`. Typically these will be stacked +#' to construct the final table body data frame. The ARD object passed here +#' will have two primary parts: the calculated summary statistics and the +#' attributes ARD. The attributes ARD is used for labeling. The ARD data frame +#' passed to this function must include a `"gts_column"` column, which is +#' added in `brdg_summary()`. +#' +#' @param cards (`card`)\cr +#' An ARD object of class `"card"` typically created with `cards::ard_*()` functions. +#' @param variables (`character`)\cr +#' character list of variables +#' @param by (`string`)\cr +#' string indicating the stratifying column +#' @param type (named `list`)\cr +#' named list of summary types +#' @param statistic (named `list`)\cr +#' named list of summary statistic names +#' @inheritParams tbl_summary +#' +#' @return data frame +#' @name brdg_summary +#' +#' @examples +#' library(cards) +#' +#' # first build ARD data frame +#' cards <- +#' ard_stack( +#' mtcars, +#' ard_continuous(variables = c("mpg", "hp")), +#' ard_categorical(variables = "cyl"), +#' ard_dichotomous(variables = "am"), +#' .missing = TRUE, +#' .attributes = TRUE +#' ) |> +#' # this column is used by the `pier_*()` functions +#' dplyr::mutate(gts_column = ifelse(context == "attributes", NA, "stat_0")) +#' +#' brdg_summary( +#' cards = cards, +#' variables = c("cyl", "am", "mpg", "hp"), +#' type = +#' list( +#' cyl = "categorical", +#' am = "dichotomous", +#' mpg = "continuous", +#' hp = "continuous2" +#' ), +#' statistic = +#' list( +#' cyl = "{n} / {N}", +#' am = "{n} / {N}", +#' mpg = "{mean} ({sd})", +#' hp = c("{median} ({p25}, {p75})", "{mean} ({sd})") +#' ) +#' ) |> +#' as_tibble() +#' +#' pier_summary_dichotomous( +#' cards = cards, +#' variables = "am", +#' statistic = list(am = "{n} ({p})") +#' ) +#' +#' pier_summary_categorical( +#' cards = cards, +#' variables = "cyl", +#' statistic = list(cyl = "{n} ({p})") +#' ) +#' +#' pier_summary_continuous2( +#' cards = cards, +#' variables = "hp", +#' statistic = list(hp = c("{median}", "{mean}")) +#' ) +#' +#' pier_summary_continuous( +#' cards = cards, +#' variables = "mpg", +#' statistic = list(mpg = "{median}") +#' ) +NULL + +#' @rdname brdg_summary +#' @export +brdg_summary <- function(cards, + variables, + type, + statistic, + by = NULL, + missing = "no", + missing_stat = "{N_miss}", + missing_text = "Unknown") { + set_cli_abort_call() + + # add gts info to the cards table -------------------------------------------- + # adding the name of the column the stats will populate + if (is_empty(by)) { + cards$gts_column <- + ifelse( + !cards$context %in% "attributes", + # cards$context %in% c("continuous", "categorical", "dichotomous", "missing"), + "stat_0", + NA_character_ + ) + } else { + cards <- + cards %>% + { + dplyr::left_join( + ., + dplyr::filter( + ., + .data$variable %in% .env$variables, + !cards$context %in% "attributes", + # cards$context %in% c("continuous", "categorical", "dichotomous", "missing"), + ) |> + dplyr::select(cards::all_ard_groups(), "variable", "context") |> + dplyr::distinct() |> + dplyr::mutate( + .by = cards::all_ard_groups(), + gts_column = paste0("stat_", dplyr::cur_group_id()) + ), + by = names(dplyr::select(., cards::all_ard_groups(), "variable", "context")) + ) + } + } + + + # build the table body pieces with bridge functions and stack them ----------- + x <- list() + x$table_body <- + dplyr::left_join( + dplyr::tibble( + variable = variables, + var_type = type[.data$variable] |> unlist() |> unname() + ), + dplyr::bind_rows( + pier_summary_continuous( + cards = cards, + variables = .get_variables_by_type(type, type = "continuous"), + statistic = statistic + ), + pier_summary_continuous2( + cards = cards, + variables = .get_variables_by_type(type, type = "continuous2"), + statistic = statistic + ), + pier_summary_categorical( + cards = cards, + variables = .get_variables_by_type(type, type = "categorical"), + statistic = statistic + ), + pier_summary_dichotomous( + cards = cards, + variables = .get_variables_by_type(type, type = "dichotomous"), + statistic = statistic + ), + pier_summary_missing_row( + cards = cards, + variables = variables, + missing = missing, + missing_stat = missing_stat, + missing_text = missing_text + ) + ), + by = "variable" + ) + + # construct default table_styling -------------------------------------------- + x <- construct_initial_table_styling(x) + + # add info to x$table_styling$header for dynamic headers --------------------- + x <- .add_table_styling_stats(x, cards = cards, by = by) + + x |> + structure(class = "gtsummary") |> + modify_column_unhide(columns = all_stat_cols()) +} + +#' @rdname brdg_summary +#' @export +pier_summary_dichotomous <- function(cards, + variables, + statistic) { + set_cli_abort_call() + if (is_empty(variables)) { + return(dplyr::tibble()) + } + + pier_summary_continuous( + cards = cards, + variables = variables, + statistic = statistic + ) +} + +#' @rdname brdg_summary +#' @export +pier_summary_categorical <- function(cards, + variables, + statistic) { + set_cli_abort_call() + if (is_empty(variables)) { + return(dplyr::tibble()) + } + # subsetting cards object on categorical summaries ---------------------------- + cards_no_attr <- + cards |> + dplyr::filter(.data$variable %in% .env$variables, !.data$context %in% "attributes") |> + cards::apply_fmt_fn() + + + # construct formatted statistics --------------------------------------------- + df_glued <- + # construct stat columns with glue by grouping variables and primary summary variable + cards_no_attr |> + dplyr::group_by(across(c("gts_column", cards::all_ard_groups(), "variable"))) |> + dplyr::group_map( + function(df_variable_stats, df_groups_and_variable) { + lst_variable_stats <- + cards::get_ard_statistics( + df_variable_stats, + .data$variable_level %in% list(NULL), + .column = "stat_fmt" + ) + + str_statistic_pre_glue <- + statistic[[df_groups_and_variable$variable[1]]] + + dplyr::mutate( + .data = df_groups_and_variable, + df_stats = + dplyr::filter(df_variable_stats, !.data$variable_level %in% list(NULL)) |> + dplyr::group_by(.data$variable_level) |> + dplyr::group_map( + function(df_variable_level_stats, df_variable_levels) { + dplyr::mutate( + .data = df_variable_levels, + stat = + map( + str_statistic_pre_glue, + function(str_to_glue) { + stat <- + glue::glue( + str_to_glue, + .envir = + cards::get_ard_statistics(df_variable_level_stats, .column = "stat_fmt") |> + c(lst_variable_stats) + ) |> + as.character() + } + ), + label = unlist(.data$variable_level) |> as.character() + ) + } + ) |> + dplyr::bind_rows() |> + list() + ) + } + ) |> + dplyr::bind_rows() + + # reshape results for final table -------------------------------------------- + df_result_levels <- + df_glued |> + # merge in variable label + dplyr::left_join( + cards |> + dplyr::filter( + .data$variable %in% .env$variables, + .data$context %in% "attributes", + .data$stat_name %in% "label" + ) |> + dplyr::select("variable", var_label = "stat"), + by = "variable" + ) |> + dplyr::mutate( + .by = "variable", + row_type = "level", + var_label = unlist(.data$var_label), + .after = 0L + ) |> + dplyr::select(-cards::all_ard_groups()) |> + tidyr::unnest(cols = "df_stats") |> + tidyr::unnest(cols = "stat") |> + tidyr::pivot_wider( + id_cols = c("row_type", "var_label", "variable", "label"), + names_from = "gts_column", + values_from = "stat" + ) + + # add header rows to results ------------------------------------------------- + df_results <- + map( + variables, + ~ dplyr::bind_rows( + df_result_levels |> + dplyr::select("variable", "var_label", "row_type") |> + dplyr::filter(.data$variable %in% .x) |> + dplyr::filter(dplyr::row_number() %in% 1L) |> + dplyr::mutate( + label = .data$var_label, + row_type = "label" + ), + df_result_levels |> + dplyr::filter(.data$variable %in% .x) + ) + ) |> + dplyr::bind_rows() + + df_results +} + +#' @rdname brdg_summary +#' @export +pier_summary_continuous2 <- function(cards, + variables, + statistic) { + set_cli_abort_call() + if (is_empty(variables)) { + return(dplyr::tibble()) + } + # subsetting cards object on continuous2 summaries ---------------------------- + cards_no_attr <- + cards |> + dplyr::filter(.data$variable %in% .env$variables, !.data$context %in% "attributes") |> + cards::apply_fmt_fn() + + # construct formatted statistics --------------------------------------------- + df_glued <- + # construct stat columns with glue by grouping variables and primary summary variable + cards_no_attr |> + dplyr::group_by(across(c("gts_column", cards::all_ard_groups(), "variable"))) |> + dplyr::group_map( + function(.x, .y) { + dplyr::mutate( + .data = .y, + stat = + map( + statistic[[.y$variable[1]]], + function(str_to_glue) { + stat <- + glue::glue( + str_to_glue, + .envir = cards::get_ard_statistics(.x, .column = "stat_fmt") + ) |> + as.character() + } + ) |> + list(), + label = + map( + statistic[[.y$variable[1]]], + function(str_to_glue) { + label <- + glue::glue( + str_to_glue, + .envir = cards::get_ard_statistics(.x, .column = "stat_label") + ) |> + as.character() + } + ) |> + list() + ) + } + ) |> + dplyr::bind_rows() + + # reshape results for final table -------------------------------------------- + df_result_levels <- + df_glued |> + # merge in variable label + dplyr::left_join( + cards |> + dplyr::filter( + .data$variable %in% .env$variables, + .data$context %in% "attributes", + .data$stat_name %in% "label" + ) |> + dplyr::select("variable", var_label = "stat"), + by = "variable" + ) |> + dplyr::mutate( + .by = "variable", + row_type = "level", + var_label = unlist(.data$var_label), + .after = 0L + ) |> + dplyr::select(-cards::all_ard_groups()) |> + tidyr::unnest(cols = c("stat", "label")) |> + tidyr::unnest(cols = c("stat", "label")) |> + tidyr::pivot_wider( + id_cols = c("row_type", "var_label", "variable", "label"), + names_from = "gts_column", + values_from = "stat" + ) + + # add header rows to results ------------------------------------------------- + df_results <- + map( + variables, + ~ dplyr::bind_rows( + df_result_levels |> + dplyr::select("variable", "var_label", "row_type") |> + dplyr::filter(.data$variable %in% .x) |> + dplyr::filter(dplyr::row_number() %in% 1L) |> + dplyr::mutate( + label = .data$var_label, + row_type = "label" + ), + df_result_levels |> + dplyr::filter(.data$variable %in% .x) + ) + ) |> + dplyr::bind_rows() + + df_results +} + +#' @rdname brdg_summary +#' @export +pier_summary_continuous <- function(cards, + variables, + statistic) { + set_cli_abort_call() + if (is_empty(variables)) { + return(dplyr::tibble()) + } + # subsetting cards object on statistical summaries --------------------------- + cards_no_attr <- + cards |> + dplyr::filter(.data$variable %in% .env$variables, !.data$context %in% "attributes") |> + cards::apply_fmt_fn() + + # construct formatted statistics --------------------------------------------- + df_glued <- + # construct stat columns with glue by grouping variables and primary summary variable + cards_no_attr |> + dplyr::group_by(across(c("gts_column", cards::all_ard_groups(), "variable"))) |> + dplyr::group_map( + function(.x, .y) { + dplyr::mutate( + .data = .y, + stat = + glue::glue( + statistic[[.data$variable[1]]], + .envir = cards::get_ard_statistics(.x, .column = "stat_fmt") + ) |> + as.character() + ) + } + ) |> + dplyr::bind_rows() + + # reshape results for final table -------------------------------------------- + df_results <- + df_glued |> + # merge in variable label + dplyr::left_join( + cards |> + dplyr::filter( + .data$variable %in% .env$variables, + .data$context %in% "attributes", + .data$stat_name %in% "label" + ) |> + dplyr::select("variable", var_label = "stat"), + by = "variable" + ) |> + dplyr::mutate( + .by = "variable", + row_type = "label", + var_label = unlist(.data$var_label), + label = .data$var_label, + .after = 0L + ) |> + tidyr::pivot_wider( + id_cols = c("row_type", "var_label", "variable", "label"), + names_from = "gts_column", + values_from = "stat" + ) + + df_results +} + +#' @rdname brdg_summary +#' @export +pier_summary_missing_row <- function(cards, + variables, + missing = "no", + missing_stat = "{N_miss}", + missing_text = "Unknown") { + set_cli_abort_call() + + # return empty tibble if no missing row requested + if (is_empty(variables) || missing == "no") { + return(dplyr::tibble()) + } + + # if "ifany", replace the variables vector with those that have missing values + if (missing == "ifany") { + variables <- + cards |> + dplyr::filter(.data$stat_name == "N_miss", .data$variable %in% .env$variables) |> + dplyr::filter(.data$stat > 0L) |> + dplyr::pull("variable") |> + unique() + } + + # slightly modifying the `x` object for missing value calculations ----------- + # make all the summary stats the same for all vars + statistic <- rep_named(variables, list(missing_stat)) + + # reshape the missing stats + pier_summary_continuous( + cards = cards, + variables = variables, + statistic = statistic + ) |> + # update the row_type and label + dplyr::mutate( + row_type = "missing", + label = missing_text + ) +} + +.add_table_styling_stats <- function(x, cards, by) { + if (is_empty(by)) { + x$table_styling$header <- + x$table_styling$header |> + dplyr::mutate( + modify_stat_N = + cards |> + dplyr::filter(.data$stat_name %in% "N_obs") |> + dplyr::pull("stat") |> + unlist() |> + getElement(1), + modify_stat_n = .data$modify_stat_N, + modify_stat_p = 1, + modify_stat_level = "Overall" + ) + } else { + df_by_stats <- cards |> + dplyr::filter(.data$variable %in% .env$by & .data$stat_name %in% c("N", "n", "p")) + + # get a data frame with the by variable stats + df_by_stats_wide <- + df_by_stats |> + dplyr::filter(.data$stat_name %in% c("n", "p")) |> + dplyr::mutate( + .by = "variable_level", + column = paste0("stat_", dplyr::cur_group_id()) + ) %>% + dplyr::bind_rows( + dplyr::select(., "variable_level", "column", stat = "variable_level") |> + dplyr::mutate(stat_name = "level") |> + dplyr::distinct() + ) |> + tidyr::pivot_wider( + id_cols = "column", + names_from = "stat_name", + values_from = "stat" + ) |> + dplyr::mutate( + dplyr::across(-"column", unlist), + dplyr::across("level", as.character) + ) |> + dplyr::rename_with( + function(x) paste0("modify_stat_", x), + .cols = -"column" + ) + + # add the stats here to the header data frame + x$table_styling$header <- + x$table_styling$header |> + dplyr::mutate( + modify_stat_N = + df_by_stats |> + dplyr::filter(.data$stat_name %in% "N") |> + dplyr::pull("stat") |> + unlist() |> + getElement(1L) + ) |> + dplyr::left_join( + df_by_stats_wide, + by = "column" + ) + } + + x +} diff --git a/R/card_summary.R b/R/card_summary.R new file mode 100644 index 000000000..9e500b39c --- /dev/null +++ b/R/card_summary.R @@ -0,0 +1,252 @@ +#' ARD summary table +#' +#' The `card_summary()` function tables descriptive statistics for +#' continuous, categorical, and dichotomous variables. +#' The functions accepts an ARD object. +#' +#' @param cards (`card`)\cr +#' An ARD object of class `"card"` typically created with `cards::ard_*()` functions. +#' @param statistic ([`formula-list-selector`][syntax])\cr +#' Used to specify the summary statistics for each variable. +#' Each of the statistics must be present in `card` as no new statistics are calculated +#' in this function. +#' The default is +#' `list(all_continuous() ~ "{median} ({p25}, {p75})", all_categorical() ~ "{n} ({p}%)")`. +#' @param missing,missing_text,missing_stat +#' Arguments dictating how and if missing values are presented: +#' - `missing`: must be one of `c("ifany", "no", "always")` +#' - `missing_text`: string indicating text shown on missing row. Default is `"Unknown"` +#' - `missing_stat`: statistic to show on missing row. Default is `"{N_miss}"`. +#' Possible values are `N_miss`, `N_obs`, `N_nonmiss`, `p_miss`, `p_nonmiss` +#' @param type ([`formula-list-selector`][syntax])\cr +#' Specifies the summary type. Accepted value are +#' `c("continuous", "continuous2", "categorical", "dichotomous")`. +#' Continuous summaries may be assigned `c("continuous", "continuous2")`, while +#' categorical and dichotomous cannot be modified. +#' @param include ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' Variables to include in the summary table. Default is `everything()` +#' +#' @return a gtsummary table of class `"card_summary"` +#' @export +#' +#' @examples +#' library(cards) +#' +#' ard_stack( +#' data = ADSL, +#' ard_categorical(variables = "AGEGR1"), +#' ard_continuous(variables = "AGE"), +#' .attributes = TRUE, +#' .missing = TRUE +#' ) |> +#' card_summary() +#' +#' ard_stack( +#' data = ADSL, +#' .by = ARM, +#' ard_categorical(variables = "AGEGR1"), +#' ard_continuous(variables = "AGE"), +#' .attributes = TRUE, +#' .missing = TRUE +#' ) |> +#' card_summary() +card_summary <- function(cards, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})", + all_categorical() ~ "{n} ({p}%)" + ), + type = NULL, + missing = c("ifany", "no", "always"), + missing_text = "Unknown", + missing_stat = "{N_miss}", + include = everything()) { + set_cli_abort_call() + # data argument checks ------------------------------------------------------- + check_not_missing(cards) + check_class(cards, "card") + missing <- arg_match(missing) + + # check structure of ARD input ----------------------------------------------- + # TODO: What other checks should we add? + if (!is_empty(names(dplyr::select(cards, cards::all_ard_groups())) |> setdiff(c("group1", "group1_level")))) { + cli::cli_abort( + c("The {.arg cards} object may only contain a single stratifying variable.", + i = "But contains {.val {names(dplyr::select(cards, cards::all_ard_groups())) |> setdiff(c('group1', 'group1_level'))}}." + ), + call = get_cli_abort_call() + ) + } + + # define a data frame based on the context of `card` ------------------------- + data <- cards |> + dplyr::select(cards::all_ard_variables()) |> + dplyr::bind_rows( + dplyr::select(cards, cards::all_ard_groups()) |> + dplyr::rename(variable = any_of("group1"), variable_level = any_of("group1_level")) + ) |> + dplyr::filter(!is.na(.data$variable)) |> + dplyr::slice(.by = "variable", 1L) |> + dplyr::mutate(variable_level = map(.data$variable_level, ~ .x %||% NA_real_)) |> + tidyr::pivot_wider( + names_from = "variable", + values_from = "variable_level" + ) |> + dplyr::mutate(across(everything(), unlist)) + + if ("group1" %in% names(cards)) { + by <- stats::na.omit(cards$group1)[1] |> unclass() + } else { + by <- character(0L) + } + cards::process_selectors(data, include = {{ include }}) + include <- setdiff(include, by) # remove by variable from list vars included + + # check that each included variable has 'missing' and 'attributes' ARDs (this can probably be relaxed later) + missing_or_attributes_ard <- + imap( + include, + ~ dplyr::filter(cards, .data$variable %in% .env$.x, .data$context %in% c("missing", "attributes")) |> + dplyr::select("variable", "context") |> + dplyr::distinct() |> + nrow() %>% + {!identical(., 2L)} # styler: off + ) |> + set_names(include) |> + unlist() + if (any(missing_or_attributes_ard)) { + cli::cli_abort( + c("{.val {names(missing_or_attributes_ard)[missing_or_attributes_ard]}} + {?does/do} not have associated {.field missing} or {.field attributes} ARD results.", + i = "Use {.fun cards::ard_missing}, {.fun cards::ard_attributes}, or + {.code cards::ard_stack(.missing=TRUE, .attributes=TRUE)} to calculate needed results." + ), + call = get_cli_abort_call() + ) + } + + # temporary type so we can evaluate `statistic`, then we'll update it + default_types <- dplyr::select(cards, "variable", "context") |> + dplyr::distinct() |> + dplyr::filter( + .data$context %in% c("continuous", "categorical", "dichotomous"), + .data$variable %in% .env$include + ) |> + deframe() |> + as.list() + + # process arguments ---------------------------------------------------------- + cards::process_formula_selectors( + data = select_prep(.list2tb(default_types, "var_type"), data[include]), + type = type + ) + # fill in unspecified variables + cards::fill_formula_selectors( + select_prep(.list2tb(default_types, "var_type"), data[include]), + type = default_types, + ) + cards::check_list_elements( + x = type, + predicate = \(x) is.character(x) && length(x) == 1L && x %in% c("categorical", "dichotomous", "continuous", "continuous2"), + error_msg = "Elements of the {.arg type} argumnet must be one of {.val {c('categorical', 'dichotomous', 'continuous', 'continuous2')}}." + ) + # if the user passed `type` then check that the values are compatible with ARD summary types + if (!missing(type)) { + walk( + include, + function(variable) { + if (default_types[[variable]] %in% "continuous" && + !type[[variable]] %in% c("continuous", "continuous2")) { + cli::cli_abort( + "Summary type for variable {.val {variable}} must be one of + {.val {c('continuous', 'continuous2')}}, not {.val {type[[variable]]}}.", + call = get_cli_abort_call() + ) + } else if (default_types[[variable]] %in% c("categorical", "dichotomous") && + !identical(type[[variable]], default_types[[variable]])) { + cli::cli_abort( + "Summary type for variable {.val {variable}} must be + {.val {default_types[[variable]]}}, not {.val {type[[variable]]}}.", + call = get_cli_abort_call() + ) + } + } + ) + } + + cards::process_formula_selectors( + data = select_prep(.list2tb(type, "var_type"), data[include]), + statistic = statistic + ) + # fill in unspecified variables + cards::fill_formula_selectors( + select_prep(.list2tb(type, "var_type"), data[include]), + statistic = eval(formals(gtsummary::card_summary)[["statistic"]]), + ) + cards::check_list_elements( + x = statistic, + predicate = \(x) is.character(x), + error_msg = "The {.arg statistic} argument values must be class {.cls character} vector." + ) + .check_stats_available(cards, statistic) + + walk( + include, + \(variable) { + if (type[[variable]] %in% c("categorical", "dichotomous", "continuous") && + !is_string(statistic[[variable]])) { + cli::cli_abort( + "Variable {.val {variable}} is type {.arg {type[[variable]]}} and + {.arg statistic} argument value must be a string of length one.", + call = get_cli_abort_call() + ) + } + } + ) + # save inputs + card_summary_inputs <- as.list(environment())[names(formals(card_summary))] + call <- match.call() + + # construct initial card_summary object -------------------------------------- + x <- + brdg_summary( + cards = cards, + by = by, + variables = include, + statistic = statistic, + type = type, + missing = missing, + missing_stat = missing_stat, + missing_text = missing_text + ) |> + append( + list( + cards = list(card_summary = cards), + inputs = card_summary_inputs, + call_list = list(card_summary = call) + ) + ) |> + structure(class = c("card_summary", "gtsummary")) + + # adding styling ------------------------------------------------------------- + x <- x |> + # add header to label column and add default indentation + modify_table_styling( + columns = "label", + label = "**Characteristic**", + rows = .data$row_type %in% c("level", "missing"), + indent = 4L + ) |> + # adding the statistic footnote + modify_table_styling( + columns = all_stat_cols(), + footnote = + .construct_summary_footnote(x$cards[["card_summary"]], include, statistic, type) + ) |> + # updating the headers for the stats columns + modify_header( + all_stat_cols() ~ ifelse(is_empty(by), "**N = {N}**", "**{level}** \nN = {n}") + ) + + # return card_summary table -------------------------------------------------- + x +} diff --git a/R/default_stat_labels.R b/R/default_stat_labels.R new file mode 100644 index 000000000..2d7efcbda --- /dev/null +++ b/R/default_stat_labels.R @@ -0,0 +1,41 @@ +#' Default Statistics Labels +#' +#' @return named list +#' @keywords internal +default_stat_labels <- function() { + list( + # standard summary stat labels + mean = "Mean", + sd = "SD", + var = "Variance", + median = "Median", + min = "Min", + max = "Max", + var = "Variance", + sum = "Sum", + n = "n", + N = "N", + p = "%", + N_obs = "No. obs.", + N_miss = "N Missing", + N_nonmiss = "N Non-missing", + p_miss = "% Missing", + p_nonmiss = "% Non-missing", + + # survey statistics + N_unweighted = "N (unweighted)", + n_unweighted = "n (unweighted)", + N_obs_unweighted = "Total N (unweighted)", + N_miss_unweighted = "N Missing (unweighted)", + N_nonmiss_unweighted = "N not Missing (unweighted)", + p_unweighted = "% (unweighted)", + p_miss_unweighted = "% Missing (unweighted)", + p_nonmiss_unweighted = "% not Missing (unweighted)", + mean.std.error = "SE", + p.std.error = "SE(%)", + deff = "Design effect" + ) |> + # adding the percentile labels + c(paste0(0:100, "% Centile") |> as.list() |> set_names(paste0("p", 0:100))) |> + utils::modifyList(val = list(p25 = "Q1", p50 = "Q2", p75 = "Q3")) +} diff --git a/R/deprecated.R b/R/deprecated.R index 68c42d998..9ddd89760 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -10,19 +10,24 @@ NULL # tentative deprecation schedule, `Sys.Date() - months(18)` # "warn" for 18 months +# 2.0.0 TARGET 2024-07-01 TODO: Update this # 1.7.2 2023-07-13 # 1.7.1 2023-04-27 # 1.7.0 2023-01-13 + + +# "stop" for 18 months, then delete from pkg, `Sys.Date() - months(36)` # 1.6.3 2022-12-06 # 1.6.2 2022-09-30 # 1.6.1 2022-06-22 # 1.6.0 2022-04-25 - -# "stop" for 18 months, then delete from pkg, `Sys.Date() - months(36)` # 1.5.2 2022-01-29 # 1.5.1 2022-01-20 # 1.5.0 2021-10-16 # 1.4.2 2021-07-13 + + +# TODO: Delete these version and dates # 1.4.1 2021-05-19 # 1.4.0 2021-04-13 # 1.3.7 2021-02-26 @@ -31,133 +36,9 @@ NULL # 1.3.4 2020-08-27 # 1.3.3 2020-08-11 - -# v1.3.3 (2020-08-11) ---------------------------------------------------------- -#' @rdname deprecated -#' @export -as_flextable <- function(...) { - lifecycle::deprecate_stop( - "1.3.3", "gtsummary::as_flextable()", "as_flex_table()", - details = paste( - "The `as_flextable()` function graduated", - "from 'Experimental' status in v1.3.3. The function's name was changed", - "to avoid a name conflict with `flextable::as_flextable()`.", - "If you are trying to use the function", - "from {flextable}, for the time being, use the double colon notation", - "when both {gtsummary} and {flextable}", - "are loaded, e.g. `flextable::as_flextable(...)`." - ) %>% - stringr::str_wrap() - ) -} - -# v1.3.6 ----------------------------------------------------------------------- -#' @rdname deprecated -#' @export -all_numeric <- function() { - lifecycle::deprecate_stop( - "1.3.6", "gtsummary::all_numeric()", - details = paste0( - "The {tidyselect} and {dplyr} packages have implemented functions to ", - "select variables by class and type, and the {gtsummary} version is ", - "now deprecated.\n\n", - "Use `where(is.numeric)` instead." - ) - ) -} - - -#' @rdname deprecated -#' @export -all_character <- function() { - lifecycle::deprecate_stop( - "1.3.6", "gtsummary::all_character()", - details = paste0( - "The {tidyselect} and {dplyr} packages have implemented functions to ", - "select variables by class and type, and the {gtsummary} version is ", - "now deprecated.\n\n", - "Use `where(is.character)` instead." - ) - ) -} - -#' @rdname deprecated -#' @export -all_integer <- function() { - lifecycle::deprecate_stop( - "1.3.6", "gtsummary::all_integer()", - details = paste0( - "The {tidyselect} and {dplyr} packages have implemented functions to ", - "select variables by class and type, and the {gtsummary} version is ", - "now deprecated.\n\n", - "Use `where(is.integer)` instead." - ) - ) -} - -#' @rdname deprecated -#' @export -all_double <- function() { - lifecycle::deprecate_stop( - "1.3.6", "gtsummary::all_double()", - details = paste0( - "The {tidyselect} and {dplyr} packages have implemented functions to ", - "select variables by class and type, and the {gtsummary} version is ", - "now deprecated.\n\n", - "Use `where(is.double)` instead." - ) - ) -} - -#' @rdname deprecated -#' @export -all_logical <- function() { - lifecycle::deprecate_stop( - "1.3.6", "gtsummary::all_logical()", - details = paste0( - "The {tidyselect} and {dplyr} packages have implemented functions to ", - "select variables by class and type, and the {gtsummary} version is ", - "now deprecated.\n\n", - "Use `where(is.logical)` instead." - ) - ) -} - -#' @rdname deprecated -#' @export -all_factor <- function() { - lifecycle::deprecate_stop( - "1.3.6", "gtsummary::all_factor()", - details = paste0( - "The {tidyselect} and {dplyr} packages have implemented functions to ", - "select variables by class and type, and the {gtsummary} version is ", - "now deprecated.\n\n", - "Use `where(is.factor)` instead." - ) - ) -} - -# this is a copy of the tidyselect where function. it can be deleted after the -# all_factor, all_character, etc. functions are fully deprecated -where <- function(fn) { - predicate <- rlang::as_function(fn) - - function(x, ...) { - out <- predicate(x, ...) - - if (!rlang::is_bool(out)) { - abort("`where()` must be used with functions that return `TRUE` or `FALSE`.") - } - - out - } -} - - # v1.6.1 ---------------------------------------------------------- #' @rdname deprecated #' @export modify_cols_merge <- function(...) { - lifecycle::deprecate_warn("1.6.1", "gtsummary::modify_cols_merge()", "modify_column_merge()") - modify_column_merge(...) + lifecycle::deprecate_stop("1.6.1", "gtsummary::modify_cols_merge()", "modify_column_merge()") } diff --git a/R/gtsummary-package.R b/R/gtsummary-package.R index 34ac0d04e..963ebe979 100644 --- a/R/gtsummary-package.R +++ b/R/gtsummary-package.R @@ -1,41 +1,11 @@ -#' @importFrom dplyr mutate select n group_by ungroup filter pull case_when -#' if_else full_join left_join distinct bind_rows count coalesce arrange rename -#' rename_at bind_cols mutate_all mutate_at slice desc rowwise inner_join -#' row_number -#' @importFrom purrr map imap map2 pmap map_chr map_dfr map_lgl map_dbl map_if -#' imap_dfr imap_lgl map2_chr pmap_lgl pmap_chr pmap_dbl compact keep discard -#' every some pluck flatten negate partial cross_df reduce chuck -#' @importFrom tidyr nest unnest complete spread -#' @importFrom tibble tibble tribble as_tibble enframe deframe -#' @importFrom rlang .data .env %||% set_names sym syms parse_expr expr exprs -#' call2 := inform abort is_function is_string enexpr inject is_empty -#' is_function is_list is_named is_character check_dots_empty -#' quo_is_null enquo eval_tidy quo_text -#' @importFrom glue glue as_glue glue_collapse -#' @importFrom stringr fixed word str_extract_all str_remove_all str_starts -#' str_split str_detect str_remove str_replace_all str_wrap str_sub str_locate -#' str_sub -#' @importFrom broom.helpers .formula_list_to_named_list .select_to_varnames -#' .generic_selector -#' @importFrom cli cli_alert_info cli_alert_danger cli_code cli_ul -#' @importFrom gt md html #' @keywords internal +#' @import rlang +#' @importFrom dplyr across +#' @importFrom glue glue "_PACKAGE" -# allowing for the use of the dot when piping -utils::globalVariables(".") - ## usethis namespace: start -#' @importFrom lifecycle deprecate_soft ## usethis namespace: end NULL -release_bullets <- function() { - c( - "Check the output from `devtools::check()` and look for warnings or messages", - "Review deprecation schedule", - "Run the code styler", - "Updated the gt help file images", - "Check build size is less than 5MB" - ) -} +utils::globalVariables(c(".")) diff --git a/R/import-standalone-check_pkg_installed.R b/R/import-standalone-check_pkg_installed.R new file mode 100644 index 000000000..7f1cd7a89 --- /dev/null +++ b/R/import-standalone-check_pkg_installed.R @@ -0,0 +1,203 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# repo: ddsjoberg/standalone +# file: standalone-check_pkg_installed.R +# last-updated: 2024-04-15 +# license: https://unlicense.org +# dependencies: standalone-cli_call_env.R +# imports: [rlang, dplyr, tidyr] +# --- +# +# This file provides functions to check package installation. +# +# ## Changelog +# nocov start +# styler: off + +#' Check Package Installation +#' +#' @description +#' - `check_pkg_installed()`: checks whether a package is installed and +#' returns an error if not available, or interactively asks user to install +#' missing dependency. If a package search is provided, +#' the function will check whether a minimum version of a package is required and installed. +#' +#' - `is_pkg_installed()`: checks whether a package is installed and +#' returns `TRUE` or `FALSE` depending on availability. If a package search is provided, +#' the function will check whether a minimum version of a package is required and installed. +#' +#' - `get_pkg_dependencies()` returns a tibble with all +#' dependencies of a specific package. +#' +#' - `get_min_version_required()` will return, if any, the minimum version +#' of `pkg` required by `reference_pkg`. +#' +#' @param pkg (`character`)\cr +#' vector of package names to check. +#' @param call (`environment`)\cr +#' frame for error messaging. Default is [get_cli_abort_call()]. +#' @param reference_pkg (`string`)\cr +#' name of the package the function will search for a minimum required version from. +#' @param lib.loc (`path`)\cr +#' location of `R` library trees to search through, see [utils::packageDescription()]. +#' +#' @return `is_pkg_installed()` and `check_pkg_installed()` returns a logical or error, +#' `get_min_version_required()` returns a data frame with the minimum version required, +#' `get_pkg_dependencies()` returns a tibble. +#' +#' @examples +#' check_pkg_installed("dplyr") +#' +#' is_pkg_installed("dplyr") +#' +#' get_pkg_dependencies() +#' +#' get_min_version_required("dplyr") +#' +#' @name check_pkg_installed +#' @noRd +NULL + +#' @inheritParams check_pkg_installed +#' @keywords internal +#' @noRd +check_pkg_installed <- function(pkg, + reference_pkg = "cards", + call = get_cli_abort_call()) { + # check inputs --------------------------------------------------------------- + check_not_missing(pkg) + check_class(pkg, cls = "character") + check_string(reference_pkg, allow_empty = TRUE) + + # get min version data ------------------------------------------------------- + df_pkg_min_version <- + get_min_version_required(pkg = pkg, reference_pkg = reference_pkg, call = call) + + # prompt user to install package --------------------------------------------- + rlang::check_installed( + pkg = df_pkg_min_version$pkg, + version = df_pkg_min_version$version, + compare = df_pkg_min_version$compare, + call = call + ) |> + # this can be removed after this issue is resolved https://github.com/r-lib/rlang/issues/1694 + suppressWarnings() +} + +#' @inheritParams check_pkg_installed +#' @keywords internal +#' @noRd +is_pkg_installed <- function(pkg, + reference_pkg = "cards", + call = get_cli_abort_call()) { + # check inputs --------------------------------------------------------------- + check_not_missing(pkg) + check_class(pkg, cls = "character") + check_string(reference_pkg, allow_empty = TRUE) + + # get min version data ------------------------------------------------------- + df_pkg_min_version <- + get_min_version_required(pkg = pkg, reference_pkg = reference_pkg, call = call) + + # check installation TRUE/FALSE ---------------------------------------------- + rlang::is_installed( + pkg = df_pkg_min_version$pkg, + version = df_pkg_min_version$version, + compare = df_pkg_min_version$compare + ) |> + # this can be removed after this issue is resolved https://github.com/r-lib/rlang/issues/1694 + suppressWarnings() +} + +#' @inheritParams check_pkg_installed +#' @keywords internal +#' @noRd +get_pkg_dependencies <- function(reference_pkg = "cards", lib.loc = NULL, call = get_cli_abort_call()) { + check_string(reference_pkg, allow_empty = TRUE, call = call) + + if (rlang::is_empty(reference_pkg)) { + return(.empty_pkg_deps_df()) + } + + description <- utils::packageDescription(reference_pkg, lib.loc = lib.loc) |> suppressWarnings() + if (identical(description, NA)) { + return(.empty_pkg_deps_df()) + } + description |> + unclass() |> + dplyr::as_tibble() |> + dplyr::select( + dplyr::any_of(c( + "Package", "Version", "Imports", "Depends", + "Suggests", "Enhances", "LinkingTo" + )) + ) |> + dplyr::rename( + reference_pkg = "Package", + reference_pkg_version = "Version" + ) |> + tidyr::pivot_longer( + -dplyr::all_of(c("reference_pkg", "reference_pkg_version")), + values_to = "pkg", + names_to = "dependency_type", + ) |> + tidyr::separate_rows("pkg", sep = ",") |> + dplyr::mutate(pkg = str_squish(.data$pkg)) |> + dplyr::filter(!is.na(.data$pkg)) |> + tidyr::separate( + .data$pkg, + into = c("pkg", "version"), + sep = " ", extra = "merge", fill = "right" + ) |> + dplyr::mutate( + compare = .data$version |> str_extract(pattern = "[>=<]+"), + version = .data$version |> str_remove_all(pattern = "[\\(\\) >=<]") + ) +} + +.empty_pkg_deps_df <- function() { + dplyr::tibble( + reference_pkg = character(0L), reference_pkg_version = character(0L), + dependency_type = character(0L), pkg = character(0L), + version = character(0L), compare = character(0L) + ) +} + +#' @inheritParams check_pkg_installed +#' @keywords internal +#' @noRd +get_min_version_required <- function(pkg, reference_pkg = "cards", + lib.loc = NULL, call = get_cli_abort_call()) { + check_not_missing(pkg, call = call) + check_class(pkg, cls = "character", call = call) + check_string(reference_pkg, allow_empty = TRUE, call = call) + + # if no package reference, return a df with just the pkg names + if (rlang::is_empty(reference_pkg)) { + return( + .empty_pkg_deps_df() |> + dplyr::full_join( + dplyr::tibble(pkg = pkg), + by = "pkg" + ) + ) + } + + # get the package_ref deps and subset on requested pkgs, also supplement df with pkgs + # that may not be proper deps of the reference package (these pkgs don't have min versions) + res <- + get_pkg_dependencies(reference_pkg, lib.loc = lib.loc) |> + dplyr::filter(.data$pkg %in% .env$pkg) |> + dplyr::full_join( + dplyr::tibble(pkg = pkg), + by = "pkg" + ) + + res +} + +# nocov end +# styler: on diff --git a/R/import-standalone-checks.R b/R/import-standalone-checks.R new file mode 100644 index 000000000..88c255e3a --- /dev/null +++ b/R/import-standalone-checks.R @@ -0,0 +1,448 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# repo: ddsjoberg/standalone +# file: standalone-checks.R +# last-updated: 2024-05-04 +# license: https://unlicense.org +# dependencies: standalone-cli_call_env.R +# imports: [rlang, cli] +# --- +# +# This file provides a minimal functions to check argument values and types +# passed by users to functions in packages. +# +# ## Changelog +# nocov start +# styler: off + +#' Check Class +#' +#' @param cls (`character`)\cr +#' character vector or string indicating accepted classes. +#' Passed to `inherits(what=cls)` +#' @param x `(object)`\cr +#' object to check +#' @param message (`character`)\cr +#' string passed to `cli::cli_abort(message)` +#' @param allow_empty (`logical(1)`)\cr +#' Logical indicating whether an empty value will pass the test. +#' Default is `FALSE` +#' @param arg_name (`string`)\cr +#' string indicating the label/symbol of the object being checked. +#' Default is `rlang::caller_arg(x)` +#' @inheritParams cli::cli_abort +#' @inheritParams rlang::abort +#' @keywords internal +#' @noRd +check_class <- function(x, + cls, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be class + {.cls {cls}} or empty, not {.obj_type_friendly {x}}.", + "The {.arg {arg_name}} argument must be class + {.cls {cls}}, not {.obj_type_friendly {x}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_class", + call = get_cli_abort_call()) { + # if empty, skip test + if (isTRUE(allow_empty) && rlang::is_empty(x)) { + return(invisible(x)) + } + + if (!inherits(x, cls)) { + cli::cli_abort(message, class = c(class, "standalone-checks"), call = call) + } + invisible(x) +} + +#' Check Class Data Frame +#' +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_data_frame <- function(x, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be class + {.cls {cls}} or empty, not {.obj_type_friendly {x}}.", + "The {.arg {arg_name}} argument must be class + {.cls {cls}}, not {.obj_type_friendly {x}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_data_frame", + call = get_cli_abort_call()) { + check_class( + x = x, cls = "data.frame", allow_empty = allow_empty, + message = message, arg_name = arg_name, class = class, call = call + ) +} + +#' Check Class Logical +#' +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_logical <- function(x, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be class + {.cls {cls}} or empty, not {.obj_type_friendly {x}}.", + "The {.arg {arg_name}} argument must be class + {.cls {cls}}, not {.obj_type_friendly {x}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_logical", + call = get_cli_abort_call()) { + check_class( + x = x, cls = "logical", allow_empty = allow_empty, + message = message, arg_name = arg_name, class = class, call = call + ) +} + +#' Check Class Logical and Scalar +#' +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_scalar_logical <- function(x, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be a scalar with class + {.cls {cls}} or empty, not {.obj_type_friendly {x}}.", + "The {.arg {arg_name}} argument must be a scalar with class + {.cls {cls}}, not {.obj_type_friendly {x}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_scalar_logical", + call = get_cli_abort_call()) { + check_logical( + x = x, allow_empty = allow_empty, + message = message, arg_name = arg_name, + class = class, call = call + ) + + check_scalar( + x = x, allow_empty = allow_empty, + message = message, arg_name = arg_name, + call = call + ) +} + +#' Check String +#' +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_string <- function(x, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be a string or empty, + not {.obj_type_friendly {x}}.", + "The {.arg {arg_name}} argument must be a string, + not {.obj_type_friendly {x}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_string", + call = get_cli_abort_call()) { + check_class( + x = x, cls = "character", allow_empty = allow_empty, + message = message, arg_name = arg_name, + class = class, call = call + ) + + check_scalar( + x = x, allow_empty = allow_empty, + message = message, arg_name = arg_name, + class = class, call = call + ) +} + +#' Check Argument not Missing +#' +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_not_missing <- function(x, + message = "The {.arg {arg_name}} argument cannot be missing.", + arg_name = rlang::caller_arg(x), + class = "check_not_missing", + call = get_cli_abort_call()) { + if (missing(x)) { + cli::cli_abort(message, class = c(class, "standalone-checks"), call = call) + } + + # can't return 'x' because it may be an unevaluable obj, eg a bare tidyselect + invisible() +} + +#' Check Length +#' +#' @param length (`integer(1)`)\cr +#' integer specifying the required length +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_length <- function(x, length, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be length {.val {length}} or empty.", + "The {.arg {arg_name}} argument must be length {.val {length}}." + ), + allow_empty = FALSE, + arg_name = rlang::caller_arg(x), + class = "check_length", + call = get_cli_abort_call()) { + # if empty, skip test + if (isTRUE(allow_empty) && rlang::is_empty(x)) { + return(invisible(x)) + } + + # check length + if (length(x) != length) { + cli::cli_abort(message, class = c(class, "standalone-checks"), call = call) + } + + invisible(x) +} + +#' Check is Scalar +#' +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_scalar <- function(x, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be length {.val {length}} or empty.", + "The {.arg {arg_name}} argument must be length {.val {length}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_scalar", + call = get_cli_abort_call()) { + check_length( + x = x, length = 1L, message = message, + allow_empty = allow_empty, arg_name = arg_name, + class = class, call = call + ) +} + +#' Check Number of Levels +#' +#' @param n_levels Number of required levels (after NA are removed). +#' @inheritParams check_class +#' @keywords internal +#' @noRd +check_n_levels <- function(x, + n_levels, + message = + "The {.arg {arg_name}} argument must have {.val {length}} levels.", + arg_name = rlang::caller_arg(x), + class = "check_n_levels", + call = get_cli_abort_call()) { + check_length( + x = stats::na.omit(x) |> unique(), + length = n_levels, message = message, + allow_empty = FALSE, arg_name = arg_name, + class = class, call = call + ) +} + +#' Check Range +#' +#' @param x numeric scalar to check +#' @param range numeric vector of length two +#' @param include_bounds logical of length two indicating whether to allow +#' the lower and upper bounds +#' @inheritParams check_class +#' +#' @return invisible +#' @keywords internal +#' @noRd +check_range <- function(x, + range, + include_bounds = c(FALSE, FALSE), + message = + "The {.arg {arg_name}} argument must be in the interval + {.code {ifelse(include_bounds[1], '[', '(')}{range[1]}, + {range[2]}{ifelse(include_bounds[2], ']', ')')}}.", + allow_empty = FALSE, + arg_name = rlang::caller_arg(x), + class = "check_range", + call = get_cli_abort_call()) { + # if empty, skip test + if (isTRUE(allow_empty) && rlang::is_empty(x)) { + return(invisible(x)) + } + + print_error <- FALSE + # check input is numeric + if (!is.numeric(x)) { + print_error <- TRUE + } + + # check the lower bound of range + if (isFALSE(print_error) && isTRUE(include_bounds[1]) && any(x < range[1])) { + print_error <- TRUE + } + if (isFALSE(print_error) && isFALSE(include_bounds[1]) && any(x <= range[1])) { + print_error <- TRUE + } + + # check upper bound of range + if (isFALSE(print_error) && isTRUE(include_bounds[2]) && any(x > range[2])) { + print_error <- TRUE + } + if (isFALSE(print_error) && isFALSE(include_bounds[2]) && any(x >= range[2])) { + print_error <- TRUE + } + + # print error + if (print_error) { + cli::cli_abort(message, class = c(class, "standalone-checks"), call = call) + } + + invisible(x) +} + +#' Check Scalar Range +#' +#' @param x numeric scalar to check +#' @param range numeric vector of length two +#' @param include_bounds logical of length two indicating whether to allow +#' the lower and upper bounds +#' @inheritParams check_class +#' +#' @return invisible +#' @keywords internal +#' @noRd +check_scalar_range <- function(x, + range, + include_bounds = c(FALSE, FALSE), + allow_empty = FALSE, + message = + "The {.arg {arg_name}} argument must be in the interval + {.code {ifelse(include_bounds[1], '[', '(')}{range[1]}, + {range[2]}{ifelse(include_bounds[2], ']', ')')}} + and length {.val {1}}.", + arg_name = rlang::caller_arg(x), + class = "check_scalar_range", + call = get_cli_abort_call()) { + check_scalar(x, message = message, arg_name = arg_name, + allow_empty = allow_empty, class = class, call = call) + + check_range(x = x, range = range, include_bounds = include_bounds, + message = message, allow_empty = allow_empty, + arg_name = arg_name, class = class, call = call) +} + +#' Check Binary +#' +#' Checks if a column in a data frame is binary, +#' that is, if the column is class `` or +#' `` and coded as `c(0, 1)` +#' +#' @param x a vector +#' @inheritParams check_class +#' +#' @return invisible +#' @keywords internal +#' @noRd +check_binary <- function(x, + allow_empty = FALSE, + message = + ifelse( + allow_empty, + "Expecting {.arg {arg_name}} to be either {.cls logical}, + {.cls {c('numeric', 'integer')}} coded as {.val {c(0, 1)}}, or empty.", + "Expecting {.arg {arg_name}} to be either {.cls logical} + or {.cls {c('numeric', 'integer')}} coded as {.val {c(0, 1)}}." + ), + arg_name = rlang::caller_arg(x), + class = "check_binary", + call = get_cli_abort_call()) { + # if empty, skip test + if (isTRUE(allow_empty) && rlang::is_empty(x)) { + return(invisible(x)) + } + + # first check x is either logical or numeric + check_class(x, cls = c("logical", "numeric", "integer"), + arg_name = arg_name, message = message, class = class, call = call) + + # if "numeric" or "integer", it must be coded as 0, 1 + if (!is.logical(x) && !(rlang::is_integerish(x) && rlang::is_empty(setdiff(x, c(0, 1, NA))))) { + cli::cli_abort(message, class = c(class, "standalone-checks"), call = call) + } + + invisible(x) +} + + +#' Check Formula-List Selector +#' +#' Checks the structure of the formula-list selector used throughout the +#' cards, cardx, and gtsummary packages. +#' +#' @param x formula-list selecting object +#' @inheritParams check_class +#' +#' @return invisible +#' @keywords internal +#' @noRd +check_formula_list_selector <- function(x, + allow_empty = FALSE, + message = + c( + ifelse( + allow_empty, + "The {.arg {arg_name}} argument must be a named list, list of formulas, a single formula, or empty.", + "The {.arg {arg_name}} argument must be a named list, list of formulas, or a single formula." + ), + "i" = "Review {.help [?syntax](cards::syntax)} for examples and details." + ), + arg_name = rlang::caller_arg(x), + class = "check_formula_list_selector", + call = get_cli_abort_call()) { + # if empty, skip test + if (isTRUE(allow_empty) && rlang::is_empty(x)) { + return(invisible(x)) + } + + # first check the general structure; must be a list or formula + check_class( + x = x, cls = c("list", "formula"), allow_empty = allow_empty, + message = message, arg_name = arg_name, class = class, call = call + ) + + # if it's a list, then check each element is either named or a formula + if (inherits(x, "list")) { + for (i in seq_along(x)) { + if (!rlang::is_named(x[i]) && !inherits(x[[i]], "formula")) { + cli::cli_abort(message, class = c(class, "standalone-checks"), call = call) + } + } + } + + invisible(x) +} + +# nocov end +# styler: on diff --git a/R/import-standalone-cli_call_env.R b/R/import-standalone-cli_call_env.R new file mode 100644 index 000000000..88ccd6934 --- /dev/null +++ b/R/import-standalone-cli_call_env.R @@ -0,0 +1,53 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# repo: ddsjoberg/standalone +# file: standalone-cli_call_env.R +# last-updated: 2024-04-10 +# license: https://unlicense.org +# imports: [rlang, cli] +# --- +# +# This file provides functions to set and access the call environment to use in cli::cli_abort() in check functions. +# +# ## Changelog +# nocov start +# styler: off + +#' Set Call Environment for [cli::cli_abort()] +#' +#' Set a call environment to be used as the `call` parameter in [cli::cli_abort()] for package checks. This function +#' is used to ensure that the correct user-facing function is reported for errors generated by internal checks that +#' use [cli::cli_abort()]. +#' +#' @param env (`enviroment`)\cr +#' call environment used as the `call` parameter in [cli::cli_abort()] for package checks +#' +#' @seealso [get_cli_abort_call()] +#' +#' @keywords internal +#' @noRd +set_cli_abort_call <- function(env = rlang::caller_env()) { + if (getOption("cli_abort_call") |> is.null()) { + options(cli_abort_call = env) + set_call <- as.call(list(function() options(cli_abort_call = NULL))) + do.call(on.exit, list(expr = set_call, after = FALSE), envir = env) + } + invisible() +} + +#' Get Call Environment for [cli::cli_abort()] +#' +#' @inheritParams set_cli_abort_call +#' @seealso [set_cli_abort_call()] +#' +#' @keywords internal +#' @noRd +get_cli_abort_call <- function() { + getOption("cli_abort_call", default = parent.frame()) +} + +# nocov end +# styler: on diff --git a/R/import-standalone-forcats.R b/R/import-standalone-forcats.R new file mode 100644 index 000000000..22a9ef349 --- /dev/null +++ b/R/import-standalone-forcats.R @@ -0,0 +1,49 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# file: standalone-forcats.R +# last-updated: 2024-05-04 +# license: https://unlicense.org +# imports: +# --- +# +# This file provides a minimal shim to provide a forcats-like API on top of +# base R functions. They are not drop-in replacements but allow a similar style +# of programming. +# +# ## Changelog +# +# nocov start +# styler: off + +fct_infreq <- function(f, ordered = NA) { + # reorder by frequency + factor( + f, + levels = table(f) |> sort(decreasing = TRUE) |> names(), + ordered = ifelse(is.na(ordered), is.ordered(f), ordered) + ) +} + +fct_inorder <- function(f, ordered = NA) { + factor( + f, + levels = stats::na.omit(unique(f)) |> union(levels(f)), + ordered = ifelse(is.na(ordered), is.ordered(f), ordered) + ) +} + +fct_rev <- function(f) { + if (!inherits(f, "factor")) f <- factor(f) + + factor( + f, + levels = rev(levels(f)), + ordered = is.ordered(f) + ) +} + +# nocov end +# styler: on diff --git a/R/import-standalone-purrr.R b/R/import-standalone-purrr.R new file mode 100644 index 000000000..623142a0e --- /dev/null +++ b/R/import-standalone-purrr.R @@ -0,0 +1,240 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# repo: r-lib/rlang +# file: standalone-purrr.R +# last-updated: 2023-02-23 +# license: https://unlicense.org +# imports: rlang +# --- +# +# This file provides a minimal shim to provide a purrr-like API on top of +# base R functions. They are not drop-in replacements but allow a similar style +# of programming. +# +# ## Changelog +# +# 2023-02-23: +# * Added `list_c()` +# +# 2022-06-07: +# * `transpose()` is now more consistent with purrr when inner names +# are not congruent (#1346). +# +# 2021-12-15: +# * `transpose()` now supports empty lists. +# +# 2021-05-21: +# * Fixed "object `x` not found" error in `imap()` (@mgirlich) +# +# 2020-04-14: +# * Removed `pluck*()` functions +# * Removed `*_cpl()` functions +# * Used `as_function()` to allow use of `~` +# * Used `.` prefix for helpers +# +# nocov start + +map <- function(.x, .f, ...) { + .f <- as_function(.f, env = global_env()) + lapply(.x, .f, ...) +} +walk <- function(.x, .f, ...) { + map(.x, .f, ...) + invisible(.x) +} + +map_lgl <- function(.x, .f, ...) { + .rlang_purrr_map_mold(.x, .f, logical(1), ...) +} +map_int <- function(.x, .f, ...) { + .rlang_purrr_map_mold(.x, .f, integer(1), ...) +} +map_dbl <- function(.x, .f, ...) { + .rlang_purrr_map_mold(.x, .f, double(1), ...) +} +map_chr <- function(.x, .f, ...) { + .rlang_purrr_map_mold(.x, .f, character(1), ...) +} +.rlang_purrr_map_mold <- function(.x, .f, .mold, ...) { + .f <- as_function(.f, env = global_env()) + out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) + names(out) <- names(.x) + out +} + +map2 <- function(.x, .y, .f, ...) { + .f <- as_function(.f, env = global_env()) + out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) + if (length(out) == length(.x)) { + set_names(out, names(.x)) + } else { + set_names(out, NULL) + } +} +map2_lgl <- function(.x, .y, .f, ...) { + as.vector(map2(.x, .y, .f, ...), "logical") +} +map2_int <- function(.x, .y, .f, ...) { + as.vector(map2(.x, .y, .f, ...), "integer") +} +map2_dbl <- function(.x, .y, .f, ...) { + as.vector(map2(.x, .y, .f, ...), "double") +} +map2_chr <- function(.x, .y, .f, ...) { + as.vector(map2(.x, .y, .f, ...), "character") +} +imap <- function(.x, .f, ...) { + map2(.x, names(.x) %||% seq_along(.x), .f, ...) +} + +pmap <- function(.l, .f, ...) { + .f <- as.function(.f) + args <- .rlang_purrr_args_recycle(.l) + do.call("mapply", c( + FUN = list(quote(.f)), + args, MoreArgs = quote(list(...)), + SIMPLIFY = FALSE, USE.NAMES = FALSE + )) +} +.rlang_purrr_args_recycle <- function(args) { + lengths <- map_int(args, length) + n <- max(lengths) + + stopifnot(all(lengths == 1L | lengths == n)) + to_recycle <- lengths == 1L + args[to_recycle] <- map(args[to_recycle], function(x) rep.int(x, n)) + + args +} + +keep <- function(.x, .f, ...) { + .x[.rlang_purrr_probe(.x, .f, ...)] +} +discard <- function(.x, .p, ...) { + sel <- .rlang_purrr_probe(.x, .p, ...) + .x[is.na(sel) | !sel] +} +map_if <- function(.x, .p, .f, ...) { + matches <- .rlang_purrr_probe(.x, .p) + .x[matches] <- map(.x[matches], .f, ...) + .x +} +.rlang_purrr_probe <- function(.x, .p, ...) { + if (is_logical(.p)) { + stopifnot(length(.p) == length(.x)) + .p + } else { + .p <- as_function(.p, env = global_env()) + map_lgl(.x, .p, ...) + } +} + +compact <- function(.x) { + Filter(length, .x) +} + +transpose <- function(.l) { + if (!length(.l)) { + return(.l) + } + + inner_names <- names(.l[[1]]) + + if (is.null(inner_names)) { + fields <- seq_along(.l[[1]]) + } else { + fields <- set_names(inner_names) + .l <- map(.l, function(x) { + if (is.null(names(x))) { + set_names(x, inner_names) + } else { + x + } + }) + } + + # This way missing fields are subsetted as `NULL` instead of causing + # an error + .l <- map(.l, as.list) + + map(fields, function(i) { + map(.l, .subset2, i) + }) +} + +every <- function(.x, .p, ...) { + .p <- as_function(.p, env = global_env()) + + for (i in seq_along(.x)) { + if (!rlang::is_true(.p(.x[[i]], ...))) return(FALSE) + } + TRUE +} +some <- function(.x, .p, ...) { + .p <- as_function(.p, env = global_env()) + + for (i in seq_along(.x)) { + if (rlang::is_true(.p(.x[[i]], ...))) return(TRUE) + } + FALSE +} +negate <- function(.p) { + .p <- as_function(.p, env = global_env()) + function(...) !.p(...) +} + +reduce <- function(.x, .f, ..., .init) { + f <- function(x, y) .f(x, y, ...) + Reduce(f, .x, init = .init) +} +reduce_right <- function(.x, .f, ..., .init) { + f <- function(x, y) .f(y, x, ...) + Reduce(f, .x, init = .init, right = TRUE) +} +accumulate <- function(.x, .f, ..., .init) { + f <- function(x, y) .f(x, y, ...) + Reduce(f, .x, init = .init, accumulate = TRUE) +} +accumulate_right <- function(.x, .f, ..., .init) { + f <- function(x, y) .f(y, x, ...) + Reduce(f, .x, init = .init, right = TRUE, accumulate = TRUE) +} + +detect <- function(.x, .f, ..., .right = FALSE, .p = is_true) { + .p <- as_function(.p, env = global_env()) + .f <- as_function(.f, env = global_env()) + + for (i in .rlang_purrr_index(.x, .right)) { + if (.p(.f(.x[[i]], ...))) { + return(.x[[i]]) + } + } + NULL +} +detect_index <- function(.x, .f, ..., .right = FALSE, .p = is_true) { + .p <- as_function(.p, env = global_env()) + .f <- as_function(.f, env = global_env()) + + for (i in .rlang_purrr_index(.x, .right)) { + if (.p(.f(.x[[i]], ...))) { + return(i) + } + } + 0L +} +.rlang_purrr_index <- function(x, right = FALSE) { + idx <- seq_along(x) + if (right) { + idx <- rev(idx) + } + idx +} + +list_c <- function(x) { + inject(c(!!!x)) +} + +# nocov end diff --git a/R/import-standalone-stringr.R b/R/import-standalone-stringr.R new file mode 100644 index 000000000..263bde5bc --- /dev/null +++ b/R/import-standalone-stringr.R @@ -0,0 +1,48 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# file: standalone-stringr.R +# last-updated: 2024-01-24 +# license: https://unlicense.org +# imports: rlang +# --- +# +# This file provides a minimal shim to provide a stringr-like API on top of +# base R functions. They are not drop-in replacements but allow a similar style +# of programming. +# +# ## Changelog +# +# nocov start +# styler: off + +str_trim <- function(string, side = c("both", "left", "right")) { + side <- rlang::arg_match(side) + trimws(x = string, which = side, whitespace = "[ \t\r\n]") +} + +str_squish <- function(string) { + gsub(x = string, pattern = "\\s+", replacement = " ") |> + str_trim(side = "both") +} + +str_remove_all <- function(string, pattern) { + gsub(x = string, pattern = pattern, replacement = "") +} + +str_extract <- function(string, pattern) { + ifelse( + str_detect(string, pattern), + regmatches(x = string, m = regexpr(pattern = pattern, text = string)), + NA_character_ + ) +} + +str_detect <- function(string, pattern) { + grepl(pattern = pattern, x = string) +} + +# nocov end +# styler: on diff --git a/R/import-standalone-tibble.R b/R/import-standalone-tibble.R new file mode 100644 index 000000000..f4c7b001f --- /dev/null +++ b/R/import-standalone-tibble.R @@ -0,0 +1,48 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# file: standalone-tibble.R +# last-updated: 2024-05-07 +# license: https://unlicense.org +# imports: [dplyr] +# --- +# +# This file provides a minimal shim to provide a tibble-like API on top of +# base R functions. They are not drop-in replacements but allow a similar style +# of programming. +# +# ## Changelog +# +# nocov start +# styler: off + +deframe <- function(x) { + if (ncol(x) == 1L) return(x[[1]]) + x[[2]] |> stats::setNames(x[[1]]) +} + +enframe <- function(x, name = "name", value = "value") { + if (!is.null(names(x))) { + lst <- list(names(x), unname(x)) |> stats::setNames(c(name, value)) + } + else { + lst <- list(seq_along(x), unname(x)) |> stats::setNames(c(name, value)) + } + dplyr::tibble(!!!lst) +} + +remove_rownames <- function(.data) { + rownames(.data) <- NULL + .data +} + +rownames_to_column <- function(.data, var = "rowname") { + .data[[var]] <- rownames(.data) + + dplyr::relocate(.data, dplyr::all_of(var), .before = 1L) +} + +# nocov end +# styler: on diff --git a/R/modify.R b/R/modify.R index a4789f987..a082249f8 100644 --- a/R/modify.R +++ b/R/modify.R @@ -1,40 +1,45 @@ -#' Modify column headers, footnotes, spanning headers, and table captions +#' Modify column headers, footnotes, and spanning headers #' -#' These functions assist with updating or adding column headers -#' (`modify_header()`), footnotes (`modify_footnote()`), spanning -#' headers (`modify_spanning_header()`), and table captions -#' (`modify_caption()`). Use `show_header_names()` to learn -#' the column names. +#' @description +#' These functions assist with modifying the aesthetics/style of a table. #' -#' @name modify -#' @param x a gtsummary object -#' @param update,... use these arguments to assign updates to headers, -#' spanning headers, and footnotes. See examples below. -#' - `update` expects a list of assignments, with the variable name or selector -#' on the LHS of the formula, and the updated string on the RHS. Also accepts -#' a named list. -#' - `...` pass individual updates outside of a list, e.g, -#' `modify_header(p.value = "**P**", all_stat_cols() ~ "**{level}**")` +#' - `modify_header()` update column headers +#' - `modify_footnote()` update/add table footnotes +#' - `modify_spanning_header()` update/add spanning headers +#' +#' The functions often require users to know the underlying column names. +#' Run `show_header_names()` to print the column names to the console. +#' +#' @param x (`gtsummary`)\cr +#' A gtsummary object +#' @param ... [`dynamic-dots`][rlang::dyn-dots]\cr +#' Used to assign updates to headers, +#' spanning headers, and footnotes. +#' +#' Use `modify_*(colname='new header/footnote')` to update a single column. Using a +#' formula will invoke tidyselect, e.g. `modify_*(all_stat_cols() ~ "**{level}**")`. +#' The dynamic dots allow syntax like `modify_header(x, !!!list(label = "Variable"))`. +#' See examples below. +#' +#' TODO: Add link when the function below is added. +#' Use the `show_header_names()` to see the column names that can be modified. +#' @param abbreviation (scalar `logical`)\cr +#' Logical indicating if an abbreviation is being updated. +#' @param text_interpret (`string`)\cr +#' String indicates whether text will be interpreted with +#' [`gt::md()`] or [`gt::html()`]. Must be `"md"` (default) or `"html"`. +# TODO: add this back when show function is added +# @param include_example (scalar `logical`)\cr +# Logical whether to include print of `modify_header()` example +#' @param update,quiet `r lifecycle::badge("deprecated")` #' -#' Use the `show_header_names()` to see the column names that can be modified. -#' @param abbreviation Logical indicating if an abbreviation is being updated. -#' @param stat_by DEPRECATED, use `update = all_stat_cols() ~ "