The following codes should give you the fastest speed for big data as long as you have many cores on your computer:
if (!require("pacman")) install.packages("pacman")
pacman::p_load(doParallel, data.table, stringr)
# get the file name
dir() %>% str_subset("\\.csv$") -> fn
# use parallel setting
(cl <- detectCores() %>%
makeCluster()) %>%
registerDoParallel()
# read and bind all files together
system.time({
big_df <- foreach(
i = fn,
.packages = "data.table"
) %dopar%
{
fread(i, colClasses = "character")
} %>%
rbindlist(fill = TRUE)
})
# end of parallel work
stopImplicitCluster(cl)
Updated in 2020/04/16: As I find a new package available for parallel computation, an alternative solution is provided using the following codes.
if (!require("pacman")) install.packages("pacman")
pacman::p_load(future.apply, data.table, stringr)
# get the file name
dir() %>% str_subset("\\.csv$") -> fn
plan(multiprocess)
future_lapply(fn,fread,colClasses = "character") %>%
rbindlist(fill = TRUE) -> res
# res is the merged data.table