Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.

2. `set()` now automatically pre-allocates new column slots if needed, similar to what `:=` already does, [#1831](https://github.com/Rdatatable/data.table/issues/1831) [#4100](https://github.com/Rdatatable/data.table/issues/4100). Thanks to @zachokeeffe and @tyner for the report and @ben-schwen for the fix.

## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025

### BREAKING CHANGE
Expand Down
12 changes: 11 additions & 1 deletion R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2854,10 +2854,20 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL, skip_absent=
invisible(x)
}

.set_needs_alloccol = function(x, value) {
# automatically allocate more space when tl <= ncol (either full or loaded from disk)
if (truelength(x) <= length(x)) return(TRUE)
if (selfrefok(x, verbose=FALSE) >= 1L) return(FALSE)
# value can be NULL or list with NULLs inside
if (is.null(value)) return(TRUE)
if (!is.list(value)) return(FALSE)
any(vapply_1b(value, is.null))
}

set = function(x,i=NULL,j,value) # low overhead, loopable
{
# If removing columns from a table that's not selfrefok, need to call setalloccol first, #7488
if ((is.null(value) || (is.list(value) && any(vapply_1b(value, is.null)))) && selfrefok(x, verbose=FALSE) < 1L) {
if (.set_needs_alloccol(x, value)) {
name = substitute(x)
setalloccol(x, verbose=FALSE)
if (is.name(name)) {
Expand Down
3 changes: 2 additions & 1 deletion inst/tests/froll.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -2084,7 +2084,8 @@ if (use.fork) {
test(6010.772, .selfref.ok(ans[[2L]]))
ans = frollapply(1:2, 2, function(x) list(data.table(x)), fill=list(data.table(NA)), simplify=FALSE)
test(6010.773, !.selfref.ok(ans[[2L]][[1L]]))
test(6010.7731, set(ans[[2L]][[1L]],, "newcol", 1L), error="data.table has either been loaded from disk")
# deactivated by #5443
# test(6010.7731, set(ans[[2L]][[1L]],, "newcol", 1L), error="data.table has either been loaded from disk")
ans = lapply(ans, lapply, setDT)
test(6010.774, .selfref.ok(ans[[2L]][[1L]])) ## fix after
ans = frollapply(1:2, 2, function(x) list(data.table(x)), fill=list(data.table(NA)), simplify=function(x) lapply(x, lapply, setDT))
Expand Down
13 changes: 11 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -14797,7 +14797,7 @@ test(2016.1, name, "DT")
test(2016.2, DT, data.table(a=1:3))
test(2016.3, DT[2,a:=4L], data.table(a=INT(1,4,3))) # no error for := when existing column
test(2016.4, set(DT,3L,1L,5L), data.table(a=INT(1,4,5))) # no error for set() when existing column
test(2016.5, set(DT,2L,"newCol",5L), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first") # just set()
test(2016.5, set(DT,2L,"newCol",5L), data.table(a=INT(1,4,5), newCol=INT(NA,5L,NA))) # works since set overallocates #4100
test(2016.6, DT[2,newCol:=6L], data.table(a=INT(1,4,5), newCol=INT(NA,6L,NA))) # := ok (it changes DT in caller)
unlink(tt)

Expand Down Expand Up @@ -19478,7 +19478,7 @@ test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in
df = data.frame(a=1:3)
setDT(df)
attr(df, "att") = 1
test(2291.1, set(df, NULL, "new", "new"), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first")
test(2291.1, set(df, NULL, "new", "new"), setattr(data.table(a=1:3, new="new"), "att", 1)) # fixed when calling setalloccol before set #4100

# ns-qualified bysub error, #6493
DT = data.table(a = 1)
Expand Down Expand Up @@ -21959,3 +21959,12 @@ test(2355.1, fread(txt, skip=0), data.table(V1 = c("b1", "c1"), a1
test(2355.2, fread(txt, skip=0, header=TRUE), data.table(V1 = c("b1", "c1"), a1 = c("b2", "c2"), a2 = c("b3", "c3")), warning="Added an extra default column name")
test(2355.3, fread(txt, skip=0, header=FALSE), data.table(V1=character(), V2=character(), V3=character()), warning="Consider fill=TRUE")
test(2355.4, fread(txt, skip=0, fill=TRUE), data.table(V1 = c("a1", "b1", "c1"), V2 = c("a2", "b2", "c2"), V3 = c("", "b3", "c3")))

# re-overallocate in set if quota is reached #496 #1831 #4100
DT = data.table()
test(2356.1, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = paste0("V",i), value = i); ncol(DT)}, 10L)
DT = structure(list(a = 1, b = 2), class = c("data.table", "data.frame"))
test(2356.2, options=c(datatable.alloccol=1L), set(DT, j="c", value=3), data.table(a=1, b=2, c=3))
# ensure := and set are consistent if they need to overallocate
DT = data.table(); DT2 = data.table()
test(2356.3, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = sprintf("V%d",i), value = i); DT}, {for (i in seq(10)) DT2[, sprintf("V%d",i) := i]; DT2})
Loading