...
Code Block | ||
---|---|---|
| ||
system.time(foo <- subset(myDF, x < 10)) user system elapsed 3.035 0.846 4.260 system.time(foo <- subset(myDT, x < 10)) user system elapsed 0.486 0.374 0.842 ## compare the data.table way of subsetting > system.time(foo <- subset(myDT, [x < 10])) user system elapsed 0.258126 0.044059 0.195185 > system.time(foo2 <- myDT[x < 10]) user system elapsed 0.214087 0.040031 0.148 119 ## here is an example with grouping and keys > grp <- sample(LETTERS[1:10], 10^7, replace = TRUE) > myDT[, grp := grp] > system.time(myDT[, as.list(coef(lm(y~x))), by = grp]) user system elapsed 1.349 0.176 1.390 > setkey(myDT, grp) > system.time(myDT[, as.list(coef(lm(y~x))), by = grp]) user system elapsed 1.206 0.156 1.349 |
...
Speed differences using different pipes:
Code Block |
---|
library(bench)
library(data.table)
library(tidyverse)
df_df <- data.frame(x = rnorm(10^5))
df_tbl <- as_tibble(df_df)
df_dt <- as.data.table(df_df)
r_pipe <- function(df) {
df ->.;
.[x < 0, ] ->.;
`+`(., 100)
}
dplyr_pipe <- function(df) {
df %>%
filter(x < 0) %>%
mutate(x = x + 100)
}
dt_pipe <- function(df) {
df[x < 0, ][
x + 100]
}
mark(base = r_pipe(df_df),
dplyr = dplyr_pipe(df_tbl),
data.table = dt_pipe(df_dt),
iterations = 1000,
check = FALSE) %>%
arrange(median) %>%
select(-result:-gc)
# A tibble: 3 x 9
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
1 base 595.2us 651.1us 1498. 1013.08KB 0 1000 0 667.45ms
2 dplyr 1.36ms 1.66ms 562. 2.32MB 0.562 999 1 1.78s
3 data.table 4.32ms 5.59ms 163. 2.53MB 1.81 989 11 6.08s |