Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languager
system.time(foo <- subset(myDF, x < 10))
user  system elapsed
3.035   0.846   4.260

system.time(foo <- subset(myDT, x < 10))
user  system elapsed
0.486   0.374   0.842

## compare the data.table way of subsetting
> system.time(foo <- subset(myDT, [x < 10]))
   user  system elapsed 
  0.258126   0.044059   0.195185  
> system.time(foo2 <- myDT[x < 10])
   user  system elapsed 
  0.214087   0.040031   0.148 119
  
## here is an example with grouping and keys
> grp <- sample(LETTERS[1:10], 10^7, replace = TRUE)
> myDT[, grp := grp]
> system.time(myDT[, as.list(coef(lm(y~x))), by = grp])
   user  system elapsed 
  1.349   0.176   1.390 
> setkey(myDT, grp)
> system.time(myDT[, as.list(coef(lm(y~x))), by = grp])
   user  system elapsed 
  1.206   0.156   1.349 

...

Speed differences using different pipes:

Code Block
library(bench)
library(data.table)
library(tidyverse)

df_df <- data.frame(x = rnorm(10^5))

df_tbl <- as_tibble(df_df)

df_dt <- as.data.table(df_df)

r_pipe <- function(df) {
  df ->.;
    .[x < 0, ] ->.;
  `+`(., 100)
}

dplyr_pipe <- function(df) {
  df %>%
    filter(x < 0) %>%
    mutate(x = x + 100)
}

dt_pipe <- function(df) {
  df[x < 0, ][
    x + 100]
}

mark(base = r_pipe(df_df),
  dplyr = dplyr_pipe(df_tbl),
  data.table = dt_pipe(df_dt),
  iterations = 1000,
  check = FALSE) %>%
  arrange(median) %>%
  select(-result:-gc)
  
  # A tibble: 3 x 9
  expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
  <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
1 base        595.2us  651.1us     1498. 1013.08KB    0      1000     0   667.45ms
2 dplyr        1.36ms   1.66ms      562.    2.32MB    0.562   999     1      1.78s
3 data.table   4.32ms   5.59ms      163.    2.53MB    1.81    989    11      6.08s