|
| 1 | +# Summary function for survfit and survfit.coxph objects |
| 2 | +summary.survfit <- function(object, times, censored=FALSE, |
| 3 | + scale=1, extend=FALSE, |
| 4 | + rmean=getOption('survfit.rmean'), |
| 5 | + data.frame= FALSE, |
| 6 | + ...) { |
| 7 | + fit <- object # I get tired of typing "object" |
| 8 | + if (!inherits(fit, 'survfit')) |
| 9 | + stop("summary.survfit can only be used for survfit", |
| 10 | + " and survfit.coxph objects") |
| 11 | + if (is.null(fit$logse)) fit$logse <- TRUE #older style objects lack this |
| 12 | + |
| 13 | + # The print.rmean option is depreciated, it is still listened |
| 14 | + # to in print.survfit, but ignored here |
| 15 | + if (is.null(rmean)) rmean <- "common" |
| 16 | + if (is.numeric(rmean)) { |
| 17 | + if (is.null(fit$start.time)) { |
| 18 | + if (rmean < min(fit$time)) |
| 19 | + stop("Truncation point for the mean time in state is < smallest survival") |
| 20 | + } |
| 21 | + else if (rmean < fit$start.time) |
| 22 | + stop("Truncation point for the mean time in state is < smallest survival") |
| 23 | + } |
| 24 | + else { |
| 25 | + rmean <- match.arg(rmean, c('none', 'common', 'individual')) |
| 26 | + if (length(rmean)==0) stop("Invalid value for rmean option") |
| 27 | + } |
| 28 | + |
| 29 | + fit0 <- survfit0(fit) |
| 30 | + if (!data.frame) { |
| 31 | + # adding time 0 makes the mean and median easier |
| 32 | + temp <- survmean(fit0, scale=scale, rmean) |
| 33 | + table <- temp$matrix #for inclusion in the output list |
| 34 | + rmean.endtime <- temp$end.time |
| 35 | + } |
| 36 | + |
| 37 | + if (!is.null(fit$strata)) { |
| 38 | + nstrat <- length(fit$strata) |
| 39 | + } else nstrat <- 1 |
| 40 | + |
| 41 | + # If times is present, then n.event, n.censor, and n.enter are summed |
| 42 | + # between those time points. Utility function to do that |
| 43 | + delta <- function(x, indx) { # sums between chosen times |
| 44 | + if (is.logical(indx)) indx <- which(indx) |
| 45 | + if (!is.null(x) && length(indx) >0) { |
| 46 | + fx <- function(x, indx) diff(c(0, c(0, cumsum(x))[indx+1])) |
| 47 | + if (is.matrix(x)) { |
| 48 | + temp <- apply(x, 2, fx, indx=indx) |
| 49 | + # don't return a vector when only 1 time point is given |
| 50 | + if (is.matrix(temp)) temp else matrix(temp, nrow=1) |
| 51 | + } |
| 52 | + else fx(x, indx) |
| 53 | + } |
| 54 | + else NULL |
| 55 | + } |
| 56 | + |
| 57 | + # called for each component of the curve that has a time dimension |
| 58 | + # and is not summed |
| 59 | + ssub<- function(x, indx) { #select an object and index |
| 60 | + if (is.logical(indx)) indx <- which(indx) |
| 61 | + if (!is.null(x) && length(indx)>0) { |
| 62 | + if (is.matrix(x)) x[pmax(1,indx),,drop=FALSE] |
| 63 | + else if (is.array(x)) x[pmax(1,indx),,,drop=FALSE] |
| 64 | + else x[pmax(1, indx)] |
| 65 | + } |
| 66 | + else NULL |
| 67 | + } |
| 68 | + |
| 69 | + # By replacing components of fit, summary.surfit inherits several bits |
| 70 | + if (missing(times)) { |
| 71 | + if (!censored) { # do not retain time points with no events |
| 72 | + index <- fit$n.event >0 |
| 73 | + for (i in c("time","n.risk", "n.event", "surv", "std.err", |
| 74 | + "upper", "lower", "cumhaz", "std.chaz")) { |
| 75 | + if (!is.null(fit[[i]])) { # not all components in all objects |
| 76 | + fit[[i]] <- ssub(fit[[i]], index) |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + # The n.enter and n.censor values are accumualated |
| 81 | + # both of these are simple vectors |
| 82 | + if (is.null(fit$strata)) { |
| 83 | + for (i in c("n.enter", "n.censor")) |
| 84 | + if (!is.null(fit[[i]])) |
| 85 | + fit[[i]] <- delta(fit[[i]], index) |
| 86 | + } |
| 87 | + else { |
| 88 | + sindx <- rep(1:nstrat, fit$strata) |
| 89 | + for (i in c("n.enter", "n.censor")) { |
| 90 | + if (!is.null(fit[[i]])) |
| 91 | + fit[[i]] <- unlist(sapply(1:nstrat, function(j) |
| 92 | + delta(fit[[i]][sindx==j], index[sindx==j]))) |
| 93 | + } |
| 94 | + # the "factor" is needed for the case that a strata has no |
| 95 | + # events at all, only censored and hence 0 lines of output |
| 96 | + # the [] retains the original names |
| 97 | + fit$strata[] <- as.vector(table(factor(sindx[index], 1:nstrat))) |
| 98 | + } |
| 99 | + } |
| 100 | + #if missing(times) and censored=TRUE, the fit object is ok as it is |
| 101 | + } |
| 102 | + else { |
| 103 | + if (length(times) ==0) stop("no values in times vector") |
| 104 | + if (inherits(times, "Date")) times <- as.numeric(times) # allow Dates |
| 105 | + if (!is.numeric(times)) stop("times must be a numeric vector") |
| 106 | + if (!all(is.finite(times))) stop("times contains missing or infinite values") |
| 107 | + times <- unique(sort(times)) |
| 108 | + fit <- fit0 # findrow() needs the starting time |
| 109 | + |
| 110 | + # findrow is called once per stratum |
| 111 | + # times will be the user specified times |
| 112 | + # returned is a subset of the rows for the stratum |
| 113 | + # We have to deal with user specified times that are before the first |
| 114 | + # time value in the curve or after the last, which is easier done one |
| 115 | + # curve at at time |
| 116 | + findrow <- function(fit, times, extend) { |
| 117 | + if (!extend) { |
| 118 | + maxtime <- max(fit$time) |
| 119 | + times <- times[times <= maxtime] |
| 120 | + } |
| 121 | + ntime <- length(fit$time) |
| 122 | + if (ntime ==0) { |
| 123 | + if (data.frame) return(list(time = times)) |
| 124 | + else stop("no points selected for one or more curves,", |
| 125 | + " data error (?) or consider using the extend argument") |
| 126 | + } |
| 127 | + |
| 128 | + index1 <- findInterval(times, fit$time) |
| 129 | + index2 <- 1 + findInterval(times, fit$time, left.open=TRUE) |
| 130 | + |
| 131 | + fit$time <- times |
| 132 | + for (i in c("surv", "upper", "lower", "std.err", "cumhaz", |
| 133 | + "std.chaz")) { |
| 134 | + if (!is.null(fit[[i]])) fit[[i]] <- ssub(fit[[i]], index1) |
| 135 | + } |
| 136 | + |
| 137 | + # Every observation in the data has to end with a censor or event. |
| 138 | + # So by definition the number at risk after the last observed time |
| 139 | + # value must be 0. |
| 140 | + fit$n.risk <- c(fit$n.risk, 0)[index2] |
| 141 | + |
| 142 | + for (i in c("n.event", "n.censor", "n.enter")) |
| 143 | + fit[[i]] <- delta(fit[[i]], index1) |
| 144 | + fit |
| 145 | + } |
| 146 | + |
| 147 | + if (nstrat ==1) fit <- findrow(fit, times, extend) |
| 148 | + else { |
| 149 | + ltemp <- vector("list", nstrat) |
| 150 | + if (length(dim(fit)) > 1) { |
| 151 | + for (i in 1:nstrat) |
| 152 | + ltemp[[i]] <- findrow(fit[i,], times, extend) |
| 153 | + } else { |
| 154 | + for (i in 1:nstrat) ltemp[[i]] <- findrow(fit[i], times, extend) |
| 155 | + } |
| 156 | + |
| 157 | + # now stack them: time= c(time for curve 1, time for curve 2, etc) |
| 158 | + # and so on for all components |
| 159 | + unlistsurv <- function(x, name) { |
| 160 | + temp <- lapply(x, function(x) x[[name]]) |
| 161 | + if (is.vector(temp[[1]])) unlist(temp) |
| 162 | + else if (is.matrix(temp[[1]])) do.call("rbind", temp) |
| 163 | + } |
| 164 | + |
| 165 | + # unlist all the components built by a set of calls to findrow |
| 166 | + # and remake the strata |
| 167 | + keep <- c("time", "surv", "upper", "lower", "std.err", |
| 168 | + "cumhaz", "n.risk", "n.event", "n.censor", "n.enter", |
| 169 | + "std.chaz") |
| 170 | + for (i in keep) |
| 171 | + if (!is.null(fit[[i]])) fit[[i]] <- unlistsurv(ltemp, i) |
| 172 | + fit$strata[] <- sapply(ltemp, function(x) length(x$time)) |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + # finish off the output structure |
| 177 | + # A survfit object may contain std(log S) or std(S), summary always std(S) |
| 178 | + if (!is.null(fit$std.err) && fit$logse) |
| 179 | + fit$std.err <- fit$std.err * fit$surv |
| 180 | + if (scale != 1) { |
| 181 | + # fix scale in the output |
| 182 | + fit$time <- fit$time/scale |
| 183 | + } |
| 184 | + |
| 185 | + if (data.frame) { |
| 186 | + fit <- unclass(fit) # toss the survfit class |
| 187 | + indx <- match(c("time", "n.risk", "n.event", "n.censor", |
| 188 | + "surv", "cumhaz", "std.err", "std.chaz", |
| 189 | + "lower", "upper"), names(fit), nomatch=0) |
| 190 | + if (!is.null(fit$strata)) |
| 191 | + newstrat <- factor(rep(1:nstrat, fit$strata), 1:nstrat, |
| 192 | + labels= names(fit$strata)) |
| 193 | + if (is.matrix(fit$surv)) { # survfit.coxph object |
| 194 | + nc <- ncol(fit$surv) |
| 195 | + ndata <- lapply(fit[indx], function(x) { |
| 196 | + if (length(x)==0) NULL |
| 197 | + else if (is.matrix(x)) c(x) |
| 198 | + else rep(x, nc)}) |
| 199 | + ndata <- data.frame(ndata) |
| 200 | + if (!is.null(fit$strata)) |
| 201 | + ndata$strata <- rep(newstrat, nc) |
| 202 | + ndata$data <- rep(1:nc, each= length(fit$time)) |
| 203 | + } else { |
| 204 | + ndata <- data.frame(fit[indx]) |
| 205 | + if (!is.null(fit$strata)) ndata$strata <- newstrat |
| 206 | + } |
| 207 | + ndata |
| 208 | + } else { |
| 209 | + fit$table <- table |
| 210 | + if (length(rmean.endtime)>0 && !any(is.na(rmean.endtime[1]))) |
| 211 | + fit$rmean.endtime <- rmean.endtime |
| 212 | + # Expand the strata. It has used 1,2,3 for a long while |
| 213 | + if (!is.null(fit$strata)) |
| 214 | + fit$strata <- factor(rep(1:nstrat, fit$strata), 1:nstrat, |
| 215 | + labels= names(fit$strata)) |
| 216 | + class(fit) <- "summary.survfit" |
| 217 | + fit |
| 218 | + } |
| 219 | +} |
0 commit comments