Skip to content

Commit

Permalink
Merge pull request #239 from Crunch-io/develop
Browse files Browse the repository at this point in the history
WIP: Finalizing tracking reports
  • Loading branch information
1beb committed Jan 11, 2021
2 parents 996a7b5 + fa320d7 commit 41e978b
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 36 deletions.
2 changes: 0 additions & 2 deletions R/asToplineCategoricalArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@
#' @param labels A character vector of labels that will be displayed in the
#' resulting crunchtabs output. Should match the number of results objects
as.ToplineCategoricalArray <- function(questions, question_alias = NULL, labels) {

if (length(questions) != length(labels))
stop("Number of labels provided does not match number of result sets")

# Use the first result item as a skeleton
obj <- questions[[1]]

is_mr <- questions[[1]]$type == "multiple_response"
is_catarray <- questions[[1]]$type == "categorical_array"

if(is_catarray) {
Expand Down
89 changes: 76 additions & 13 deletions R/trackingReports.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,105 @@
#' @param dataset_list A list of two or more crunch datasets. Datasets should be
#' provided in time order. From oldest to youngest. (i.e, wave 1, wave 2,
#' ..., wave n)
#' @param vars A character vector of question aliases to be included in the report
#' @param vars A character vector of question aliases to be included in the
#' report this may include aliases that are available in at least one of the
#' datasets specified in dataset_list
#' @param weight NULL to accept each dataset's current weight or a single alias
#' that is available in all datasets as a string.
#' that is available in all datasets as a string. Multiple weights is not
#' recommended in a tracking report.
#' @param labels The labels for each wave. Should be of a length that
#' matches the number of datasets.
tracking_report <- function(dataset_list, vars, labels = NULL, weight = NULL) {
# topline tabbooks
tabs <- tracking_report_tabs(dataset_list, vars, weight)

if (is.null(labels))
labels <- paste0("Wave ", seq_len(length(dataset_list)))

# Use the first result item as a skeleton
rebuilt_results <- tabs[[1]]
# In previous iterations we used the first item of tabs as a skeleton
# However, what if there is an alias that is not included in the first
# variable? Instead we build the rebuilt_results object piece by piece
# using the first available result for each alias to create a skeleton

rebuilt_results <- list()
class(rebuilt_results) <- c("Toplines", "CrunchTabs")
rebuilt_results$results <- lapply(vars, function(x) NULL)
rebuilt_results$metadata <- tabs[[1]]$metadata
names(rebuilt_results$results) <- vars
rebuilt_results$banner <- NULL

# Loop through each element of tabs, suck out the first result available
# per alias and use that result as part of the skeleton. If there is more
# than one result but less than n results, we need to denote that for future
# use.
#
# For example, if someone has a survey where "q1" was asked in waves 1 and 3
# but not 2 - we need a good way to identify this.

for (v in vars) {
var_results <- lapply(tabs, function(x) return(x$results[[v]]))
results_available <-which(!unlist(lapply(var_results, is.null)))
first_var_result <- which(!unlist(lapply(var_results, is.null)))[1]
rebuilt_results$results[[v]] <- var_results[[first_var_result]]
rebuilt_results$results[[v]]$available_at <- results_available


# For each alias, we set an attribute that identifies it's availability
# across all the datasets: "all", "partial", and "single"
# - "all" means it is available in every dataset
# - "partial" means it is available in only some datasets
# - "single" means it is available in exactly one dataset

# Because we use subsetting at the list level, "all" and "partial"
# would follow a typical path that labeling was adjusted appropriately
# for presentation in the resulting pdf "single" should act as a simple
# passthrough where no additional formatting or manipulation takes place
# on the result.

# The single case
if(length(results_available) == 1) {
rebuilt_results$results[[v]]$availability <- "single"
} else {
rebuilt_results$results[[v]]$availability <- "general"
}

}

# Now that we have an attribute that identifies availability we can use it as
# a trigger for logic that allows us to customize the result of each
# condition.
#
# We wil loop over each variable and either combine those elements that are
# setup for tracking, or passthrough those that are singles. As singles
# represent the simplest case, we will deal with them first.

for (v in vars) {
message("Preparing: ",v)
if (rebuilt_results$results[[v]]$availability == "single") {
next
}

available_at <- rebuilt_results$results[[v]]$available_at

message("Preparing: ",v) # TODO: Delete me after feature dev
result_list <- lapply(tabs, function(x) x$results[[v]])
if(rebuilt_results$results[[v]]$type == "categorical_array") {
rebuilt_results$results <- c(
catArrayToCategoricals(
result_list,
result_list[available_at],
question_alias=v,
labels=labels
labels=labels[available_at]
),
rebuilt_results$results
)
rebuilt_results$results[[v]] <- NULL

# Fix the class!
# We must fake the class of the object
class(rebuilt_results$results) <- c("ToplineResults", "CrosstabsResults")
} else {
rebuilt_results$results[[v]] <- as.ToplineCategoricalArray(
result_list,
result_list[available_at],
question_alias = v,
labels = labels
)
labels = labels[available_at]
)
}

}
Expand All @@ -59,7 +121,8 @@ tracking_report_tabs <- function(datasets, vars, weight = NULL) {
if(is.null(weight)) {
weight = weight(x)
}
crosstabs(x, vars, weight, include_numeric = TRUE)
adj_vars = vars[vars %in% names(x)]
crosstabs(x, adj_vars, weight, include_numeric = TRUE)
}
)
}
7 changes: 7 additions & 0 deletions R/writeCodeBookLatex.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ writeCodeBookLatex <- function(
preamble = NULL, suppress_zero_counts = FALSE, appendix = TRUE, logo = NULL,
position = NULL, path = NULL, filename = NULL, logging = FALSE,
...) {

if(!is.null(crunch::weight(ds)))
stop(paste(
"Codebooks are designed to work with whole numbers. Your dataset is",
"weighted and the resulting codebook will either not run or have",
"breaking display issues."
))

options("crunchtabs.codebook.suppress.zeros" = suppress_zero_counts)

Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ While recontact reports are designed for questions asked in the same dataset, we
)

writeLatex(ct, pdf = TRUE, theme = theme)

![Tracking Report Example - Flipped grids](example-016-tracking-report.png "Tracking reports")


![Tracking Report Example - Flipped grids](vignettes/example-016-tracking-report.png)

### Create a Cross Tabulation

Expand Down
27 changes: 22 additions & 5 deletions dev-misc/tracking_reports_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,25 @@ weight(ds3) <- ds3$weight1
ds1 <- loadDataset("Example dataset W1")
ds2 <- loadDataset("Example dataset W2")
ds3 <- loadDataset("Example dataset W3")


tema <- themeNew(default_theme = themeDefaultLatex(), latex_flip_grids = TRUE, one_per_sheet = FALSE)
ct <- tracking_report(list(ds1, ds2, ds3), vars = c("allpets", "q1", "petloc"))
writeLatex(ct, pdf = TRUE,theme = tema)
ds1$only_wave1 <- factor(sample(letters[1:5], 20, replace = T))
ds2$only_wave2 <- factor(sample(letters[1:5], 20, replace = T))
ds3$only_wave3 <- factor(sample(letters[1:5], 20, replace = T))
ds1$avail_wave13 <- factor(sample(letters[1:5], 20, replace = T))
ds3$avail_wave13 <- factor(sample(letters[1:5], 20, replace = T))
description(ds2$only_wave2) <- "This question is only available in wave 2"
description(ds1$avail_wave13) <- "This question is only available in waves 1 and 3"
description(ds1$avail_wave13) <- "This question is only available in waves 1 and 3"

thema <- themeNew(default_theme = themeDefaultLatex(), latex_flip_grids = FALSE, one_per_sheet = FALSE)
ct <- tracking_report(list(ds1, ds2, ds3), vars = c("allpets", "q1", "only_wave2", "avail_wave13"))
writeLatex(ct, pdf = TRUE,theme = thema, title = "Data from 3 Example Datasets")


# Clean up

datasets() %>%
as.data.frame() %>%
filter(grepl("Example", name)) %>%
pull(id) %>%
lapply(function(x)
with_consent(deleteDataset(sprintf("https://app.crunch.io/datasets/%s",x))))
38 changes: 25 additions & 13 deletions tests/testthat/test-banner.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,28 @@ test_that("Single banner with one variable, recodes - categories rename, else",
expect_identical(banner_data[["Results"]][["age5"]][["categories"]], c("Under 25", "Over 54"))
})

# TODO: Figure these out, maybe never.
# with_test_authentication({
# ds <- loadDataset("https://app.crunch.io/api/datasets/868e8b3e01834c45b73e56e80160d3c3/")
# test_that("Error handling - banner", {
# expect_warning(banner(ds, list(c(), "A"="art3")),
# "No variables found in 'Banner1' in `vars`. 'Banner1' will be ignored.")
#
# expect_error(banner(ds, list(Results = c("profile_gender")), recodes = list(profile_gender = list("Male2"="Man"))),
# "Responses in `recodes` must be included in variable responses. This is not true for 'Male2' in 'profile_gender'.")
# expect_error(banner(ds, list(Results = c("profile_gender")), recodes = list(profile_gender = list("Male"="Man", "Female"="Man"))),
# "Combining categories is not currently supported. Please check 'profile_gender' recodes.")
# })
# })
context("getBannerInfo")

test_that("Returns default banner", {
expect_equal(getBannerInfo(NULL), default_banner)
})

context("removeInserts")

test_that("Adjustments for subtotals", {
var <- list()
theme <- list()
theme$format_subtotals <- NULL
theme$format_headers <- NULL
var$inserts_obj <- list()
var$inserts_obj$test <- "Fake Object of class Subtotal"
class(var$inserts_obj$test) <- "Subtotal"
var$inserts_obj$other <- "Fake Object of class Headers"
class(var$inserts_obj$other) <- "Headers"

expect_equal(
removeInserts(var, theme),
list(inserts_obj = structure(list(), .Names = character(0)),
inserts = structure(list(), .Names = character(0)))
)
})
9 changes: 9 additions & 0 deletions tests/testthat/test-write-latex.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ with_temp_dir({
writeLatex(cs, theme = theme, pdf = TRUE)
theme <- themeNew(default_theme = theme, format_weighted_n=list(latex_add_parenthesis = TRUE))
writeLatex(cs, theme = theme, pdf = TRUE)
expect_true(file.remove("Example Dataset with Nets.pdf"))
expect_true(file.remove("Example Dataset with Nets.tex"))
})

test_that("Write Latex toplines", {
Expand All @@ -109,6 +111,8 @@ with_temp_dir({
ts$results[[1]]$description <- bad_description
writeLatex(ts, pdf = TRUE, file = "topline2")
expect_true(file.exists("topline2.pdf"))
expect_true(file.remove("Example Dataset with Nets.pdf"))
expect_true(file.remove("Example Dataset with Nets.tex"))
})
})

Expand Down Expand Up @@ -179,4 +183,9 @@ test_that("Adds nonTabBookSummary as expected", {
any(grepl("clearpage$", res))
)

})

test_that("Clean up", {
expect_true(file.remove("Example Dataset with Nets.pdf"))
expect_true(file.remove("Example Dataset with Nets.tex"))
})
55 changes: 55 additions & 0 deletions tests/testthat/test-writeCodeBookLatex.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ context("writeCodeBookLatex")
test_that("End to end writeCodeBookLatex", {
ds <- readRDS(test_path("fixtures/example_dataset.rds"))

mockery::stub(
writeCodeBookLatex,
"crunch::weight", NULL
)

mockery::stub(
writeCodeBookLatex,
"codeBookItemBody",
Expand All @@ -19,7 +24,9 @@ test_that("End to end writeCodeBookLatex", {
sample_desc = "US Voting Adults",
logo = "yougov",
pdf = TRUE)

)

tex <- readLines("Example-dataset.tex")
expect_equal(res, NULL)
expect_equal(length(tex), 149)
Expand Down Expand Up @@ -54,12 +61,19 @@ test_that("End to end writeCodeBookLatex", {
test_that("Dataset name as title if title not specified", {
ds <- readRDS(test_path("fixtures/example_dataset.rds"))

mockery::stub(
writeCodeBookLatex,
"crunch::weight", NULL
)

mockery::stub(
writeCodeBookLatex,
"codeBookItemBody",
readRDS(test_path("fixtures/codeBookItem_allpets.rds"))
)



mockery::stub(
writeCodeBookLatex,
"crunch::name",
Expand Down Expand Up @@ -87,6 +101,11 @@ test_that("Dataset name as title if title not specified", {
test_that("Dataset name as title if title not specified", {
ds <- readRDS(test_path("fixtures/example_dataset.rds"))

mockery::stub(
writeCodeBookLatex,
"crunch::weight", NULL
)

mockery::stub(
writeCodeBookLatex,
"codeBookItemBody",
Expand Down Expand Up @@ -131,6 +150,11 @@ test_that("Appendices are positioned as expected", {
# dput %>%
# saveRDS("tests/testthat/fixtures/codeBookItem_inputregstate")

mockery::stub(
writeCodeBookLatex,
"crunch::weight", NULL
)

mockery::stub(
writeCodeBookLatex,
"codeBookItemBody",
Expand Down Expand Up @@ -174,6 +198,10 @@ test_that("Position functions as expected", {
# ds <- loadDataset("Example dataset")
# codeBookItemBody(ds$allpets) %>% dput() %>%
# saveRDS("tests/testthat/fixtures/codeBookItem_allpets.rds")
mockery::stub(
writeCodeBookLatex,
"crunch::weight", NULL
)

mockery::stub(
writeCodeBookLatex,
Expand Down Expand Up @@ -242,3 +270,30 @@ test_that("default yg logo returns normal path", {
p <- default_yg_logo()
expect_equal(p, system.file("YouGov.png", package = "crunchtabs"))
})

test_that("Expect a stop if dataset is weighted", {
ds <- readRDS(test_path("fixtures/example_dataset.rds"))

mockery::stub(
writeCodeBookLatex,
"crunch::weight", "weight_alias"
)

mockery::stub(
writeCodeBookLatex,
"codeBookItemBody",
readRDS(test_path("fixtures/codeBookItem_allpets.rds"))
)

mockery::stub(writeCodeBookLatex, "file.open", NULL)

expect_error(suppressWarnings(
writeCodeBookLatex(
ds[c("allpets")],
title = "Hello",
subtitle = "Goodbye",
sample_desc = "US Voting Adults",
logo = "yougov",
pdf = TRUE)
), "Codebooks are designed to work with whole numbers")
})
2 changes: 1 addition & 1 deletion vignettes/Tracking-Recontact-Reports.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ There are a number of important elements here:

4. Many defaults are extracted from the first dataset in the `dataset_list`.

![](example-016-tracking-report.png "Tracking reports")
![Example tracking report](example-016-tracking-report.png "Tracking reports")

### Tracking Reports for Other Stacking Data

Expand Down

0 comments on commit 41e978b

Please sign in to comment.