Skip to content

Commit 774ee0f

Browse files
GH-41834: [R] Better error handling in dplyr code (#41576)
* GitHub Issue: #41834
1 parent 4a2df66 commit 774ee0f

38 files changed

+804
-823
lines changed

r/R/dplyr-across.R

+5-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ expand_across <- function(.data, quos_in, exclude_cols = NULL) {
3434
)
3535

3636
if (!all(names(across_call[-1]) %in% c(".cols", ".fns", ".names"))) {
37-
abort("`...` argument to `across()` is deprecated in dplyr and not supported in Arrow")
37+
arrow_not_supported(
38+
"`...` argument to `across()` is deprecated in dplyr and",
39+
body = c(">" = "Convert your call into a function or formula including the arguments"),
40+
call = rlang::caller_call()
41+
)
3842
}
3943

4044
if (!is.null(across_call[[".cols"]])) {

r/R/dplyr-arrange.R

+44-43
Original file line numberDiff line numberDiff line change
@@ -19,47 +19,46 @@
1919
# The following S3 methods are registered on load if dplyr is present
2020

2121
arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
22-
call <- match.call()
23-
.data <- as_adq(.data)
24-
exprs <- expand_across(.data, quos(...))
22+
try_arrow_dplyr({
23+
.data <- as_adq(.data)
24+
exprs <- expand_across(.data, quos(...))
2525

26-
if (.by_group) {
27-
# when the data is grouped and .by_group is TRUE, order the result by
28-
# the grouping columns first
29-
exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
30-
}
31-
if (length(exprs) == 0) {
32-
# Nothing to do
33-
return(.data)
34-
}
35-
.data <- as_adq(.data)
36-
# find and remove any dplyr::desc() and tidy-eval
37-
# the arrange expressions inside an Arrow data_mask
38-
sorts <- vector("list", length(exprs))
39-
descs <- logical(0)
40-
mask <- arrow_mask(.data)
41-
for (i in seq_along(exprs)) {
42-
x <- find_and_remove_desc(exprs[[i]])
43-
exprs[[i]] <- x[["quos"]]
44-
sorts[[i]] <- arrow_eval(exprs[[i]], mask)
45-
names(sorts)[i] <- format_expr(exprs[[i]])
46-
if (inherits(sorts[[i]], "try-error")) {
47-
msg <- paste("Expression", names(sorts)[i], "not supported in Arrow")
48-
return(abandon_ship(call, .data, msg))
26+
if (.by_group) {
27+
# when the data is grouped and .by_group is TRUE, order the result by
28+
# the grouping columns first
29+
exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
4930
}
50-
if (length(mask$.aggregations)) {
51-
# dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it.
52-
# But we could, the same way it works in mutate() via join, if someone asks.
53-
# Until then, just error.
54-
# TODO: add a test for this
55-
msg <- paste("Expression", format_expr(expr), "not supported in arrange() in Arrow")
56-
return(abandon_ship(call, .data, msg))
31+
if (length(exprs) == 0) {
32+
# Nothing to do
33+
return(.data)
5734
}
58-
descs[i] <- x[["desc"]]
59-
}
60-
.data$arrange_vars <- c(sorts, .data$arrange_vars)
61-
.data$arrange_desc <- c(descs, .data$arrange_desc)
62-
.data
35+
.data <- as_adq(.data)
36+
# find and remove any dplyr::desc() and tidy-eval
37+
# the arrange expressions inside an Arrow data_mask
38+
sorts <- vector("list", length(exprs))
39+
descs <- logical(0)
40+
mask <- arrow_mask(.data)
41+
for (i in seq_along(exprs)) {
42+
x <- find_and_remove_desc(exprs[[i]])
43+
exprs[[i]] <- x[["quos"]]
44+
sorts[[i]] <- arrow_eval(exprs[[i]], mask)
45+
names(sorts)[i] <- format_expr(exprs[[i]])
46+
if (length(mask$.aggregations)) {
47+
# dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it.
48+
# But we could, the same way it works in mutate() via join, if someone asks.
49+
# Until then, just error.
50+
# TODO: add a test for this
51+
arrow_not_supported(
52+
.actual_msg = "Expression not supported in arrange() in Arrow",
53+
call = expr
54+
)
55+
}
56+
descs[i] <- x[["desc"]]
57+
}
58+
.data$arrange_vars <- c(sorts, .data$arrange_vars)
59+
.data$arrange_desc <- c(descs, .data$arrange_desc)
60+
.data
61+
})
6362
}
6463
arrange.Dataset <- arrange.ArrowTabular <- arrange.RecordBatchReader <- arrange.arrow_dplyr_query
6564

@@ -73,10 +72,9 @@ find_and_remove_desc <- function(quosure) {
7372
expr <- quo_get_expr(quosure)
7473
descending <- FALSE
7574
if (length(all.vars(expr)) < 1L) {
76-
stop(
77-
"Expression in arrange() does not contain any field names: ",
78-
deparse(expr),
79-
call. = FALSE
75+
validation_error(
76+
"Expression in arrange() does not contain any field names",
77+
call = quosure
8078
)
8179
}
8280
# Use a while loop to remove any number of nested pairs of enclosing
@@ -90,7 +88,10 @@ find_and_remove_desc <- function(quosure) {
9088
# ensure desc() has only one argument (when an R expression is a function
9189
# call, length == 2 means it has exactly one argument)
9290
if (length(expr) > 2) {
93-
stop("desc() expects only one argument", call. = FALSE)
91+
validation_error(
92+
"desc() expects only one argument",
93+
call = expr
94+
)
9495
}
9596
# remove desc() and toggle descending
9697
expr <- expr[[2]]

r/R/dplyr-datetime-helpers.R

+16-15
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) {
1919
if (tolower(Sys.info()[["sysname"]]) == "windows" && locale != "C") {
2020
# MingW C++ std::locale only supports "C" and "POSIX"
21-
stop(paste0(
22-
"On Windows, time locales other than 'C' are not supported in Arrow. ",
23-
"Consider setting `Sys.setlocale('LC_TIME', 'C')`"
24-
))
21+
arrow_not_supported(
22+
"On Windows, time locales other than 'C'",
23+
body = c(">" = "Consider setting `Sys.setlocale('LC_TIME', 'C')`")
24+
)
2525
}
2626
locale
2727
}
@@ -56,13 +56,15 @@ duration_from_chunks <- function(chunks) {
5656
matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)]
5757

5858
if (any(is.na(matched_chunks))) {
59-
abort(
60-
paste0(
61-
"named `difftime` units other than: ",
62-
oxford_paste(accepted_chunks, quote_symbol = "`"),
63-
" not supported in Arrow. \nInvalid `difftime` parts: ",
59+
arrow_not_supported(
60+
paste(
61+
"named `difftime` units other than:",
62+
oxford_paste(accepted_chunks, quote_symbol = "`")
63+
),
64+
body = c(i = paste(
65+
"Invalid `difftime` parts:",
6466
oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`")
65-
)
67+
))
6668
)
6769
}
6870

@@ -114,7 +116,6 @@ binding_as_date_character <- function(x,
114116
}
115117

116118
binding_as_date_numeric <- function(x, origin = "1970-01-01") {
117-
118119
# Arrow does not support direct casting from double to date32(), but for
119120
# integer-like values we can go via int32()
120121
# TODO: revisit after ARROW-15798
@@ -442,7 +443,7 @@ parse_period_unit <- function(x) {
442443
unit <- as.integer(pmatch(str_unit_start, known_units)) - 1L
443444

444445
if (any(is.na(unit))) {
445-
abort(
446+
validation_error(
446447
sprintf(
447448
"Invalid period name: '%s'",
448449
str_unit,
@@ -484,13 +485,13 @@ parse_period_unit <- function(x) {
484485
# more special cases: lubridate imposes sensible maximum
485486
# values on the number of seconds, minutes and hours
486487
if (unit == 3L && multiple > 60) {
487-
abort("Rounding with second > 60 is not supported")
488+
validation_error("Rounding with second > 60 is not supported")
488489
}
489490
if (unit == 4L && multiple > 60) {
490-
abort("Rounding with minute > 60 is not supported")
491+
validation_error("Rounding with minute > 60 is not supported")
491492
}
492493
if (unit == 5L && multiple > 24) {
493-
abort("Rounding with hour > 24 is not supported")
494+
validation_error("Rounding with hour > 24 is not supported")
494495
}
495496

496497
list(unit = unit, multiple = multiple)

0 commit comments

Comments
 (0)