Skip to content

Commit 4a5c24e

Browse files
committed
Add the data.frame option, and a new test for it and predict
1 parent fe7117f commit 4a5c24e

19 files changed

+779
-495
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,8 @@
33
*.so
44
noweb/code.nw
55
noweb/code.tex
6+
noweb/code.aux
7+
noweb/code.log
8+
noweb/code.out
9+
noweb/code.toc
10+
noweb/noweb.sty

R/coxph.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,8 @@ coxph <- function(formula, data, weights, subset, na.action,
276276
pvars <- attr(Terms, 'predvars')
277277
pmethod <- sub("makepredictcall.", "", as.vector(methods("makepredictcall")))
278278
for (i in 1:ntrans) {
279-
newtt <- (tt[[i]])(mf[[timetrans$vars[i]]], Y[,1], istrat, weights)
280-
mf[[timetrans$vars[i]]] <- newtt
279+
newtt <- (tt[[i]])(mf[[timetrans$var[i]]], Y[,1], istrat, weights)
280+
mf[[timetrans$var[i]]] <- newtt
281281
nclass <- class(newtt)
282282
if (any(nclass %in% pmethod)) { # It has a makepredictcall method
283283
dummy <- as.call(list(as.name(class(newtt)[1]), tcall[[i]][[2]]))

R/predict.coxph.R

+17-22
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ predict.coxph <- function(object, newdata,
1111
type <-match.arg(type)
1212
if (type=="survival") {
1313
survival <- TRUE
14-
type <- "expected" #this is to stop lots of "or" statements
14+
type <- "expected" # survival and expecte have nearly the same code path
1515
}
1616
else survival <- FALSE
17+
if (type == "expected") reference <- "sample" # a common ref is easiest
1718

1819
n <- object$n
1920
Terms <- object$terms
@@ -179,60 +180,54 @@ predict.coxph <- function(object, newdata,
179180
afit.n <- length(afit$time)
180181
if (missing(newdata)) {
181182
# In this case we need se.fit, nothing else
182-
j1 <- approx(afit$time, 1:afit.n, y[indx,1], method='constant',
183-
f=0, yleft=0, yright=afit.n)$y
183+
j1 <- findInterval(y[indx,1], afit$time)
184184
chaz <- c(0, afit$cumhaz)[j1 +1]
185185
varh <- c(0, cumsum(afit$varhaz))[j1 +1]
186186
xbar <- rbind(0, afit$xbar)[j1+1,,drop=F]
187187
if (ncol(y)==2) {
188188
dt <- (chaz * x[indx,]) - xbar
189189
se[indx] <- sqrt(varh + rowSums((dt %*% object$var) *dt)) *
190190
risk[indx]
191-
}
191+
}
192192
else {
193-
j2 <- approx(afit$time, 1:afit.n, y[indx,2], method='constant',
194-
f=0, yleft=0, yright=afit.n)$y
195-
chaz2 <- c(0, afit$cumhaz)[j2 +1]
196-
varh2 <- c(0, cumsum(afit$varhaz))[j2 +1]
197-
xbar2 <- rbind(0, afit$xbar)[j2+1,,drop=F]
193+
j2 <- findInterval(y[indx,2], afit$time)
194+
chaz2 <- c(0, afit$cumhaz)[j2 +1L]
195+
varh2 <- c(0, cumsum(afit$varhaz))[j2 +1L]
196+
xbar2 <- rbind(0, afit$xbar)[j2+ 1L,,drop=F]
198197
dt <- (chaz * x[indx,]) - xbar
199198
v1 <- varh + rowSums((dt %*% object$var) *dt)
200199
dt2 <- (chaz2 * x[indx,]) - xbar2
201200
v2 <- varh2 + rowSums((dt2 %*% object$var) *dt2)
202201
se[indx] <- sqrt(v2-v1)* risk[indx]
203-
}
204202
}
203+
}
205204

206205
else {
207206
#there is new data
208207
use.x <- TRUE
209208
indx2 <- which(newstrat == i)
210-
j1 <- approx(afit$time, 1:afit.n, newy[indx2,1],
211-
method='constant', f=0, yleft=0, yright=afit.n)$y
209+
j1 <- findInterval(newy[indx2,1], afit$time)
212210
chaz <-c(0, afit$cumhaz)[j1+1]
213211
pred[indx2] <- chaz * newrisk[indx2]
214212
if (se.fit) {
215213
varh <- c(0, cumsum(afit$varhaz))[j1+1]
216214
xbar <- rbind(0, afit$xbar)[j1+1,,drop=F]
217-
}
215+
}
218216
if (ncol(y)==2) {
219217
if (se.fit) {
220218
dt <- (chaz * newx[indx2,]) - xbar
221219
se[indx2] <- sqrt(varh + rowSums((dt %*% object$var) *dt)) *
222220
newrisk[indx2]
223-
}
224221
}
222+
}
225223
else {
226-
j2 <- approx(afit$time, 1:afit.n, newy[indx2,2],
227-
method='constant', f=0, yleft=0, yright=afit.n)$y
228-
chaz2 <- approx(-afit$time, afit$cumhaz, -newy[indx2,2],
229-
method="constant", rule=2, f=0)$y
230-
chaz2 <-c(0, afit$cumhaz)[j2+1]
224+
j2 <- findInterval(newy[indx2,2], afit$time)
225+
chaz2 <-c(0, afit$cumhaz)[j2+1L]
231226
pred[indx2] <- (chaz2 - chaz) * newrisk[indx2]
232-
227+
233228
if (se.fit) {
234-
varh2 <- c(0, cumsum(afit$varhaz))[j2+1]
235-
xbar2 <- rbind(0, afit$xbar)[j2+1,,drop=F]
229+
varh2 <- c(0, cumsum(afit$varhaz))[j2 +1L]
230+
xbar2 <- rbind(0, afit$xbar)[j2 + 1L,,drop=F]
236231
dt <- (chaz * newx[indx2,]) - xbar
237232
dt2 <- (chaz2 * newx[indx2,]) - xbar2
238233

R/statefig.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ statefig <- function(layout, connect, margin=.03, box=TRUE,
4545
cbox <- matrix(0, ncol=2, nrow=nstate) #coordinates will be here
4646
n <- length(layout)
4747

48-
ix <- rep(seq_along(layout), layout)
48+
ix <- rep(seq(along=layout), layout)
4949
if (is.vector(layout) || ncol(layout)> 1) { #left to right
5050
cbox[,1] <- space(n)[ix]
5151
for (i in 1:n) cbox[ix==i,2] <- 1 -space(layout[i])

R/summary.survfit.R

+219
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Summary function for survfit and survfit.coxph objects
2+
summary.survfit <- function(object, times, censored=FALSE,
3+
scale=1, extend=FALSE,
4+
rmean=getOption('survfit.rmean'),
5+
data.frame= FALSE,
6+
...) {
7+
fit <- object # I get tired of typing "object"
8+
if (!inherits(fit, 'survfit'))
9+
stop("summary.survfit can only be used for survfit",
10+
" and survfit.coxph objects")
11+
if (is.null(fit$logse)) fit$logse <- TRUE #older style objects lack this
12+
13+
# The print.rmean option is depreciated, it is still listened
14+
# to in print.survfit, but ignored here
15+
if (is.null(rmean)) rmean <- "common"
16+
if (is.numeric(rmean)) {
17+
if (is.null(fit$start.time)) {
18+
if (rmean < min(fit$time))
19+
stop("Truncation point for the mean time in state is < smallest survival")
20+
}
21+
else if (rmean < fit$start.time)
22+
stop("Truncation point for the mean time in state is < smallest survival")
23+
}
24+
else {
25+
rmean <- match.arg(rmean, c('none', 'common', 'individual'))
26+
if (length(rmean)==0) stop("Invalid value for rmean option")
27+
}
28+
29+
fit0 <- survfit0(fit)
30+
if (!data.frame) {
31+
# adding time 0 makes the mean and median easier
32+
temp <- survmean(fit0, scale=scale, rmean)
33+
table <- temp$matrix #for inclusion in the output list
34+
rmean.endtime <- temp$end.time
35+
}
36+
37+
if (!is.null(fit$strata)) {
38+
nstrat <- length(fit$strata)
39+
} else nstrat <- 1
40+
41+
# If times is present, then n.event, n.censor, and n.enter are summed
42+
# between those time points. Utility function to do that
43+
delta <- function(x, indx) { # sums between chosen times
44+
if (is.logical(indx)) indx <- which(indx)
45+
if (!is.null(x) && length(indx) >0) {
46+
fx <- function(x, indx) diff(c(0, c(0, cumsum(x))[indx+1]))
47+
if (is.matrix(x)) {
48+
temp <- apply(x, 2, fx, indx=indx)
49+
# don't return a vector when only 1 time point is given
50+
if (is.matrix(temp)) temp else matrix(temp, nrow=1)
51+
}
52+
else fx(x, indx)
53+
}
54+
else NULL
55+
}
56+
57+
# called for each component of the curve that has a time dimension
58+
# and is not summed
59+
ssub<- function(x, indx) { #select an object and index
60+
if (is.logical(indx)) indx <- which(indx)
61+
if (!is.null(x) && length(indx)>0) {
62+
if (is.matrix(x)) x[pmax(1,indx),,drop=FALSE]
63+
else if (is.array(x)) x[pmax(1,indx),,,drop=FALSE]
64+
else x[pmax(1, indx)]
65+
}
66+
else NULL
67+
}
68+
69+
# By replacing components of fit, summary.surfit inherits several bits
70+
if (missing(times)) {
71+
if (!censored) { # do not retain time points with no events
72+
index <- fit$n.event >0
73+
for (i in c("time","n.risk", "n.event", "surv", "std.err",
74+
"upper", "lower", "cumhaz", "std.chaz")) {
75+
if (!is.null(fit[[i]])) { # not all components in all objects
76+
fit[[i]] <- ssub(fit[[i]], index)
77+
}
78+
}
79+
80+
# The n.enter and n.censor values are accumualated
81+
# both of these are simple vectors
82+
if (is.null(fit$strata)) {
83+
for (i in c("n.enter", "n.censor"))
84+
if (!is.null(fit[[i]]))
85+
fit[[i]] <- delta(fit[[i]], index)
86+
}
87+
else {
88+
sindx <- rep(1:nstrat, fit$strata)
89+
for (i in c("n.enter", "n.censor")) {
90+
if (!is.null(fit[[i]]))
91+
fit[[i]] <- unlist(sapply(1:nstrat, function(j)
92+
delta(fit[[i]][sindx==j], index[sindx==j])))
93+
}
94+
# the "factor" is needed for the case that a strata has no
95+
# events at all, only censored and hence 0 lines of output
96+
# the [] retains the original names
97+
fit$strata[] <- as.vector(table(factor(sindx[index], 1:nstrat)))
98+
}
99+
}
100+
#if missing(times) and censored=TRUE, the fit object is ok as it is
101+
}
102+
else {
103+
if (length(times) ==0) stop("no values in times vector")
104+
if (inherits(times, "Date")) times <- as.numeric(times) # allow Dates
105+
if (!is.numeric(times)) stop("times must be a numeric vector")
106+
if (!all(is.finite(times))) stop("times contains missing or infinite values")
107+
times <- unique(sort(times))
108+
fit <- fit0 # findrow() needs the starting time
109+
110+
# findrow is called once per stratum
111+
# times will be the user specified times
112+
# returned is a subset of the rows for the stratum
113+
# We have to deal with user specified times that are before the first
114+
# time value in the curve or after the last, which is easier done one
115+
# curve at at time
116+
findrow <- function(fit, times, extend) {
117+
if (!extend) {
118+
maxtime <- max(fit$time)
119+
times <- times[times <= maxtime]
120+
}
121+
ntime <- length(fit$time)
122+
if (ntime ==0) {
123+
if (data.frame) return(list(time = times))
124+
else stop("no points selected for one or more curves,",
125+
" data error (?) or consider using the extend argument")
126+
}
127+
128+
index1 <- findInterval(times, fit$time)
129+
index2 <- 1 + findInterval(times, fit$time, left.open=TRUE)
130+
131+
fit$time <- times
132+
for (i in c("surv", "upper", "lower", "std.err", "cumhaz",
133+
"std.chaz")) {
134+
if (!is.null(fit[[i]])) fit[[i]] <- ssub(fit[[i]], index1)
135+
}
136+
137+
# Every observation in the data has to end with a censor or event.
138+
# So by definition the number at risk after the last observed time
139+
# value must be 0.
140+
fit$n.risk <- c(fit$n.risk, 0)[index2]
141+
142+
for (i in c("n.event", "n.censor", "n.enter"))
143+
fit[[i]] <- delta(fit[[i]], index1)
144+
fit
145+
}
146+
147+
if (nstrat ==1) fit <- findrow(fit, times, extend)
148+
else {
149+
ltemp <- vector("list", nstrat)
150+
if (length(dim(fit)) > 1) {
151+
for (i in 1:nstrat)
152+
ltemp[[i]] <- findrow(fit[i,], times, extend)
153+
} else {
154+
for (i in 1:nstrat) ltemp[[i]] <- findrow(fit[i], times, extend)
155+
}
156+
157+
# now stack them: time= c(time for curve 1, time for curve 2, etc)
158+
# and so on for all components
159+
unlistsurv <- function(x, name) {
160+
temp <- lapply(x, function(x) x[[name]])
161+
if (is.vector(temp[[1]])) unlist(temp)
162+
else if (is.matrix(temp[[1]])) do.call("rbind", temp)
163+
}
164+
165+
# unlist all the components built by a set of calls to findrow
166+
# and remake the strata
167+
keep <- c("time", "surv", "upper", "lower", "std.err",
168+
"cumhaz", "n.risk", "n.event", "n.censor", "n.enter",
169+
"std.chaz")
170+
for (i in keep)
171+
if (!is.null(fit[[i]])) fit[[i]] <- unlistsurv(ltemp, i)
172+
fit$strata[] <- sapply(ltemp, function(x) length(x$time))
173+
}
174+
}
175+
176+
# finish off the output structure
177+
# A survfit object may contain std(log S) or std(S), summary always std(S)
178+
if (!is.null(fit$std.err) && fit$logse)
179+
fit$std.err <- fit$std.err * fit$surv
180+
if (scale != 1) {
181+
# fix scale in the output
182+
fit$time <- fit$time/scale
183+
}
184+
185+
if (data.frame) {
186+
fit <- unclass(fit) # toss the survfit class
187+
indx <- match(c("time", "n.risk", "n.event", "n.censor",
188+
"surv", "cumhaz", "std.err", "std.chaz",
189+
"lower", "upper"), names(fit), nomatch=0)
190+
if (!is.null(fit$strata))
191+
newstrat <- factor(rep(1:nstrat, fit$strata), 1:nstrat,
192+
labels= names(fit$strata))
193+
if (is.matrix(fit$surv)) { # survfit.coxph object
194+
nc <- ncol(fit$surv)
195+
ndata <- lapply(fit[indx], function(x) {
196+
if (length(x)==0) NULL
197+
else if (is.matrix(x)) c(x)
198+
else rep(x, nc)})
199+
ndata <- data.frame(ndata)
200+
if (!is.null(fit$strata))
201+
ndata$strata <- rep(newstrat, nc)
202+
ndata$data <- rep(1:nc, each= length(fit$time))
203+
} else {
204+
ndata <- data.frame(fit[indx])
205+
if (!is.null(fit$strata)) ndata$strata <- newstrat
206+
}
207+
ndata
208+
} else {
209+
fit$table <- table
210+
if (length(rmean.endtime)>0 && !any(is.na(rmean.endtime[1])))
211+
fit$rmean.endtime <- rmean.endtime
212+
# Expand the strata. It has used 1,2,3 for a long while
213+
if (!is.null(fit$strata))
214+
fit$strata <- factor(rep(1:nstrat, fit$strata), 1:nstrat,
215+
labels= names(fit$strata))
216+
class(fit) <- "summary.survfit"
217+
fit
218+
}
219+
}

0 commit comments

Comments
 (0)