From febb18f01248192f99e2d9ff10fc3f4da8ed2e64 Mon Sep 17 00:00:00 2001 From: JaseZiv Date: Tue, 7 May 2024 20:24:26 +1000 Subject: [PATCH] addressing #375 --- DESCRIPTION | 2 +- NEWS.md | 7 +++- R/tm_player_injury_history.R | 81 +++++++++++++++++------------------- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 02059662..21fcddc0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.5.0003 +Version: 0.6.5.0004 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index b36b2f4a..cac2250d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,9 +4,14 @@ * `fb_league_stats()` not returning `opponent` table. (0.6.5.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/355) * `tm_player_bio()` not returning values in the `player_valuation`, `max_player_valuation` and `max_player_valuation_date` fields. Unfortunately, `max_player_valuation` and `max_player_valuation_date` fields are no able to be scraped at this release (0.6.5.0002) [#357](https://github.com/JaseZiv/worldfootballR/issues/357) - * `fb_league_stats()` not returning `player` table when hidden on page load. (0.6.5.0003) [#351](https://github.com/JaseZiv/worldfootballR/issues/351) + +### Breaking Changes + +* In addressing the issue with `tm_player_injury_history()` in [#375](https://github.com/JaseZiv/worldfootballR/issues/375), the previously names column `club` has been renamed `club_missed_games_for` to better represent that this column will contain the games the player missed games for, as previously this column could have been misunderstood to be who they were playing for when they were injured (0.6.5.0004) + + *** # worldfootballR 0.6.5 diff --git a/R/tm_player_injury_history.R b/R/tm_player_injury_history.R index fbff1611..f6c1fde2 100644 --- a/R/tm_player_injury_history.R +++ b/R/tm_player_injury_history.R @@ -47,49 +47,44 @@ tm_player_injury_history <- function(player_urls) { pg <- xml2::read_html(page_url) %>% rvest::html_nodes("#yw1") %>% rvest::html_nodes("tbody") %>% .[[1]] %>% rvest::html_children() } - season_injured <- tryCatch(pg %>% rvest::html_nodes("td:nth-child(1)") %>% rvest::html_text(), - error = function(e) season_injured <- NA_character_) %>% .replace_empty_na() - injury <- tryCatch(pg %>% rvest::html_nodes(".zentriert+ .hauptlink") %>% rvest::html_text(), - error = function(e) injury <- NA_character_) %>% .replace_empty_na() - injured_since <- tryCatch(pg %>% rvest::html_nodes(".hauptlink+ .zentriert") %>% rvest::html_text() %>% .tm_fix_dates(), - error = function(e) injured_since <- NA_character_) %>% .replace_empty_na() - injured_until <- tryCatch(pg %>% rvest::html_nodes(".zentriert+ td.zentriert") %>% rvest::html_text() %>% .tm_fix_dates(), - error = function(e) injured_until <- NA_character_) %>% .replace_empty_na() - duration <- tryCatch(pg %>% rvest::html_nodes(".zentriert+ td.rechts") %>% rvest::html_text(), - error = function(e) duration <- NA_character_) %>% .replace_empty_na() - games_missed <- tryCatch(pg %>% rvest::html_nodes(".wappen_verletzung") %>% rvest::html_text() %>% as.numeric() %>% suppressWarnings(), - error = function(e) games_missed <- NA_integer_) %>% .replace_empty_na() - club <- tryCatch(pg %>% rvest::html_nodes("img") %>% rvest::html_attr("alt"), - error = function(e) goals <- NA_character_) %>% .replace_empty_na() - - - out_df <- cbind(player_name, player_url, season_injured, injury, injured_since, injured_until, duration, games_missed, club) %>% - suppressWarnings() %>% data.frame() - - out_df <- out_df %>% - dplyr::mutate(player_name = as.character(.data[["player_name"]]), - player_url = as.character(.data[["player_url"]]), - season_injured = as.character(.data[["season_injured"]]), - injury = as.character(.data[["injury"]]), - injured_since = lubridate::ymd(.data[["injured_since"]]), - injured_until = lubridate::ymd(.data[["injured_until"]]), - duration = as.character(.data[["duration"]]), - games_missed = as.character(.data[["games_missed"]]), - club = as.character(.data[["club"]])) - - # # ----- use the below if want to include player bio data to injury histories -----# - # - # out_df <- cbind(season_injured, injury, injured_since, injured_until, duration, games_missed, club) %>% - # suppressWarnings() %>% data.frame() - - # out_df <- out_df %>% - # dplyr::mutate(season_injured = as.character(.data[["season_injured"]]), - # injury = as.character(.data[["injury"]]), - # injured_since = lubridate::ymd(.data[["injured_since"]]), - # injured_until = lubridate::ymd(.data[["injured_until"]]), - # duration = as.character(.data[["duration"]]), - # games_missed = as.character(.data[["games_missed"]]), - # club = as.character(.data[["club"]])) + get_each_tab_row <- function(pg_row) { + + season_injured <- tryCatch(pg_row %>% rvest::html_nodes("td:nth-child(1)") %>% rvest::html_text(), + error = function(e) season_injured <- NA_character_) %>% .replace_empty_na() + injury <- tryCatch(pg_row %>% rvest::html_nodes(".zentriert+ .hauptlink") %>% rvest::html_text(), + error = function(e) injury <- NA_character_) %>% .replace_empty_na() + injured_since <- tryCatch(pg_row %>% rvest::html_nodes(".hauptlink+ .zentriert") %>% rvest::html_text() %>% .tm_fix_dates(), + error = function(e) injured_since <- NA_character_) %>% .replace_empty_na() + injured_until <- tryCatch(pg_row %>% rvest::html_nodes(".zentriert+ td.zentriert") %>% rvest::html_text() %>% .tm_fix_dates(), + error = function(e) injured_until <- NA_character_) %>% .replace_empty_na() + duration <- tryCatch(pg_row %>% rvest::html_nodes(".zentriert+ td.rechts") %>% rvest::html_text(), + error = function(e) duration <- NA_character_) %>% .replace_empty_na() + games_missed <- tryCatch(pg_row %>% rvest::html_nodes(".wappen_verletzung") %>% rvest::html_text() %>% as.numeric() %>% suppressWarnings(), + error = function(e) games_missed <- NA_integer_) %>% .replace_empty_na() + club_missed_games_for <- tryCatch(pg_row %>% rvest::html_nodes("img") %>% rvest::html_attr("alt"), + error = function(e) goals <- NA_character_) %>% .replace_empty_na() |> paste0(collapse = ", ") + + + df <- cbind(player_name, player_url, season_injured, injury, injured_since, injured_until, duration, games_missed, club_missed_games_for) %>% + suppressWarnings() %>% data.frame() + + df <- df %>% + dplyr::mutate(player_name = as.character(.data[["player_name"]]), + player_url = as.character(.data[["player_url"]]), + season_injured = as.character(.data[["season_injured"]]), + injury = as.character(.data[["injury"]]), + injured_since = lubridate::ymd(.data[["injured_since"]]), + injured_until = lubridate::ymd(.data[["injured_until"]]), + duration = as.character(.data[["duration"]]), + games_missed = as.character(.data[["games_missed"]]), + club_missed_games_for = as.character(.data[["club_missed_games_for"]])) + + return(df) + } + + out_df <- pg |> + purrr::map_dfr(get_each_tab_row) + return(out_df) }