therneau
diff --git a/‎.gitignore
+5 b/‎.gitignore
+5
diff --git a/‎R/coxph.R
+2-2 b/‎R/coxph.R
+2-2
diff --git a/‎R/predict.coxph.R
+17-22 b/‎R/predict.coxph.R
+17-22
diff --git a/‎R/statefig.R
+1-1 b/‎R/statefig.R
+1-1
diff --git a/‎R/summary.survfit.R
+219 b/‎R/summary.survfit.R
+219
@@ -3,3 +3,8 @@
 *.so
 noweb/code.nw
 noweb/code.tex
+noweb/code.aux
+noweb/code.log
+noweb/code.out
+noweb/code.toc
+noweb/noweb.sty
@@ -276,8 +276,8 @@ coxph <- function(formula, data, weights, subset, na.action,
         pvars <- attr(Terms, 'predvars')
         pmethod <- sub("makepredictcall.", "", as.vector(methods("makepredictcall")))
         for (i in 1:ntrans) {
-            newtt <- (tt[[i]])(mf[[timetrans$vars[i]]], Y[,1], istrat, weights)
-            mf[[timetrans$vars[i]]] <- newtt
+            newtt <- (tt[[i]])(mf[[timetrans$var[i]]], Y[,1], istrat, weights)
+            mf[[timetrans$var[i]]] <- newtt
             nclass <- class(newtt)
             if (any(nclass %in% pmethod)) { # It has a makepredictcall method
                 dummy <- as.call(list(as.name(class(newtt)[1]), tcall[[i]][[2]]))
 
@@ -11,9 +11,10 @@ predict.coxph <- function(object, newdata,
     type <-match.arg(type)
     if (type=="survival") {
         survival <- TRUE
-        type <- "expected"  #this is to stop lots of "or" statements
+        type <- "expected"  # survival and expecte have nearly the same code path
     }
     else survival <- FALSE
+    if (type == "expected") reference <- "sample"  # a common ref is easiest
 
     n <- object$n
     Terms <-  object$terms
@@ -179,60 +180,54 @@ predict.coxph <- function(object, newdata,
                 afit.n <- length(afit$time)
                 if (missing(newdata)) { 
                     # In this case we need se.fit, nothing else
-                    j1 <- approx(afit$time, 1:afit.n, y[indx,1], method='constant',
-                                 f=0, yleft=0, yright=afit.n)$y
+                    j1 <- findInterval(y[indx,1], afit$time)
                     chaz <- c(0, afit$cumhaz)[j1 +1]
                     varh <- c(0, cumsum(afit$varhaz))[j1 +1]
                     xbar <- rbind(0, afit$xbar)[j1+1,,drop=F]
                     if (ncol(y)==2) {
                         dt <- (chaz * x[indx,]) - xbar
                         se[indx] <- sqrt(varh + rowSums((dt %*% object$var) *dt)) *
                             risk[indx]
-                        }
+                    }
                     else {
-                        j2 <- approx(afit$time, 1:afit.n, y[indx,2], method='constant',
-                                 f=0, yleft=0, yright=afit.n)$y
-                        chaz2 <- c(0, afit$cumhaz)[j2 +1]
-                        varh2 <- c(0, cumsum(afit$varhaz))[j2 +1]
-                        xbar2 <- rbind(0, afit$xbar)[j2+1,,drop=F]
+                        j2 <- findInterval(y[indx,2], afit$time)
+                        chaz2 <- c(0, afit$cumhaz)[j2 +1L]
+                        varh2 <- c(0, cumsum(afit$varhaz))[j2 +1L]
+                        xbar2 <- rbind(0, afit$xbar)[j2+ 1L,,drop=F]
                         dt <- (chaz * x[indx,]) - xbar
                         v1 <- varh +  rowSums((dt %*% object$var) *dt)
                         dt2 <- (chaz2 * x[indx,]) - xbar2
                         v2 <- varh2 + rowSums((dt2 %*% object$var) *dt2)
                         se[indx] <- sqrt(v2-v1)* risk[indx]
-                        }
                     }
+                }
 
                 else {
                     #there is new data
                     use.x <- TRUE
                     indx2 <- which(newstrat == i)
-                    j1 <- approx(afit$time, 1:afit.n, newy[indx2,1], 
-                                 method='constant', f=0, yleft=0, yright=afit.n)$y
+                    j1 <- findInterval(newy[indx2,1], afit$time)
                     chaz <-c(0, afit$cumhaz)[j1+1]
                     pred[indx2] <- chaz * newrisk[indx2]
                     if (se.fit) {
                         varh <- c(0, cumsum(afit$varhaz))[j1+1]
                         xbar <- rbind(0, afit$xbar)[j1+1,,drop=F]
-                        }
+                    }
                     if (ncol(y)==2) {
                         if (se.fit) {
                             dt <- (chaz * newx[indx2,]) - xbar
                             se[indx2] <- sqrt(varh + rowSums((dt %*% object$var) *dt)) *
                                 newrisk[indx2]
-                            }
                         }
+                    }
                     else {
-                        j2 <- approx(afit$time, 1:afit.n, newy[indx2,2], 
-                                 method='constant', f=0, yleft=0, yright=afit.n)$y
-                                    chaz2 <- approx(-afit$time, afit$cumhaz, -newy[indx2,2],
-                                   method="constant", rule=2, f=0)$y
-                        chaz2 <-c(0, afit$cumhaz)[j2+1]
+                        j2 <- findInterval(newy[indx2,2], afit$time)
+                        chaz2 <-c(0, afit$cumhaz)[j2+1L]
                         pred[indx2] <- (chaz2 - chaz) * newrisk[indx2]
-                    
+
                         if (se.fit) {
-                            varh2 <- c(0, cumsum(afit$varhaz))[j2+1]
-                            xbar2 <- rbind(0, afit$xbar)[j2+1,,drop=F]
+                            varh2 <- c(0, cumsum(afit$varhaz))[j2 +1L]
+                            xbar2 <- rbind(0, afit$xbar)[j2 + 1L,,drop=F]
                             dt <- (chaz * newx[indx2,]) - xbar
                             dt2 <- (chaz2 * newx[indx2,]) - xbar2
 
 
@@ -45,7 +45,7 @@ statefig <- function(layout, connect, margin=.03, box=TRUE,
         cbox <- matrix(0, ncol=2, nrow=nstate)  #coordinates will be here
         n <- length(layout)
 
-        ix <- rep(seq_along(layout), layout) 
+        ix <- rep(seq(along=layout), layout) 
         if (is.vector(layout) || ncol(layout)> 1) { #left to right     
             cbox[,1] <- space(n)[ix]
             for (i in 1:n) cbox[ix==i,2] <- 1 -space(layout[i])
 
@@ -0,0 +1,219 @@
+# Summary function for survfit and survfit.coxph objects
+summary.survfit <- function(object, times, censored=FALSE, 
+                            scale=1, extend=FALSE, 
+                            rmean=getOption('survfit.rmean'),
+                            data.frame= FALSE, 
+                            ...) {
+    fit <- object  # I get tired of typing "object"
+    if (!inherits(fit, 'survfit'))
+            stop("summary.survfit can only be used for survfit",
+                 " and survfit.coxph objects")
+    if (is.null(fit$logse)) fit$logse <- TRUE   #older style objects lack this
+
+    # The print.rmean option is depreciated, it is still listened
+    #   to in print.survfit, but ignored here
+    if (is.null(rmean)) rmean <- "common"
+    if (is.numeric(rmean)) {
+        if (is.null(fit$start.time)) {
+            if (rmean < min(fit$time)) 
+                stop("Truncation point for the mean time in state is < smallest survival")
+        }
+        else if (rmean < fit$start.time)
+            stop("Truncation point for the mean time in state is < smallest survival")
+    }
+    else {
+        rmean <- match.arg(rmean, c('none', 'common', 'individual'))
+        if (length(rmean)==0) stop("Invalid value for rmean option")
+    }
+
+    fit0 <- survfit0(fit)
+    if (!data.frame) {
+        # adding time 0 makes the mean and median easier
+        temp <- survmean(fit0, scale=scale, rmean)  
+        table <- temp$matrix  #for inclusion in the output list
+        rmean.endtime <- temp$end.time
+    }
+
+    if (!is.null(fit$strata)) {
+        nstrat <-  length(fit$strata)
+    } else nstrat <- 1
+
+    # If times is present, then n.event, n.censor, and n.enter are summed
+    #  between those time points.  Utility function to do that
+    delta <- function(x, indx) {  # sums between chosen times
+        if (is.logical(indx)) indx <- which(indx)
+        if (!is.null(x) && length(indx) >0) {
+            fx <- function(x, indx) diff(c(0, c(0, cumsum(x))[indx+1]))
+            if (is.matrix(x)) {
+                temp <- apply(x, 2, fx, indx=indx)
+                # don't return a vector when only 1 time point is given
+                if (is.matrix(temp)) temp else matrix(temp, nrow=1)
+            }
+            else fx(x, indx)
+        }
+        else NULL
+    }
+
+    # called for each component of the curve that has a time dimension
+    #  and is not summed
+    ssub<- function(x, indx) {  #select an object and index
+        if (is.logical(indx)) indx <- which(indx)
+        if (!is.null(x) && length(indx)>0) {
+            if (is.matrix(x)) x[pmax(1,indx),,drop=FALSE]
+            else if (is.array(x))  x[pmax(1,indx),,,drop=FALSE]
+            else x[pmax(1, indx)]
+        }
+        else NULL
+    }
+    
+    # By replacing components of fit, summary.surfit inherits several bits
+    if (missing(times)) {
+        if (!censored) {  # do not retain time points with no events
+            index <- fit$n.event >0
+            for (i in c("time","n.risk", "n.event", "surv", "std.err", 
+                                "upper", "lower", "cumhaz", "std.chaz")) {
+                if (!is.null(fit[[i]])) {  # not all components in all objects
+                    fit[[i]] <- ssub(fit[[i]], index)
+                }
+            }
+
+            # The n.enter and n.censor values are accumualated
+            #  both of these are simple vectors
+            if (is.null(fit$strata)) {
+                for (i in c("n.enter", "n.censor"))
+                    if (!is.null(fit[[i]]))
+                        fit[[i]] <- delta(fit[[i]], index)
+            }
+            else {
+                sindx <- rep(1:nstrat, fit$strata)
+                for (i in c("n.enter", "n.censor")) {
+                    if (!is.null(fit[[i]]))
+                        fit[[i]] <- unlist(sapply(1:nstrat, function(j) 
+                                     delta(fit[[i]][sindx==j], index[sindx==j])))
+                }
+                # the "factor" is needed for the case that a strata has no
+                #  events at all, only censored and hence 0 lines of output
+                # the [] retains the original names
+                fit$strata[] <- as.vector(table(factor(sindx[index], 1:nstrat))) 
+            }
+        }
+        #if missing(times) and censored=TRUE, the fit object is ok as it is
+    }
+    else {
+        if (length(times) ==0) stop("no values in times vector")
+        if (inherits(times, "Date")) times <- as.numeric(times) # allow Dates
+        if (!is.numeric(times)) stop("times must be a numeric vector")
+        if (!all(is.finite(times))) stop("times contains missing or infinite values")  
+        times <- unique(sort(times))
+        fit <- fit0  # findrow() needs the starting time
+
+        # findrow is called once per stratum
+        #   times will be the user specified times
+        #   returned is a subset of the rows for the stratum
+        # We have to deal with user specified times that are before the first
+        #  time value in the curve or after the last, which is easier done one
+        #  curve at at time
+        findrow <- function(fit, times, extend) {
+            if (!extend) {
+                maxtime <- max(fit$time)
+                times <- times[times <= maxtime]
+            }
+            ntime <- length(fit$time)
+            if (ntime ==0) { 
+                if (data.frame) return(list(time = times))
+                else stop("no points selected for one or more curves,", 
+                     " data error (?) or consider using the extend argument")
+            }
+                            
+            index1 <- findInterval(times, fit$time) 
+            index2 <- 1 + findInterval(times, fit$time, left.open=TRUE)
+                
+            fit$time <- times
+            for (i in c("surv", "upper", "lower", "std.err", "cumhaz",
+                        "std.chaz")) {
+                if (!is.null(fit[[i]])) fit[[i]] <- ssub(fit[[i]], index1)
+            }
+            
+            # Every observation in the data has to end with a censor or event.
+            #  So by definition the number at risk after the last observed time
+            #  value must be 0.
+            fit$n.risk <- c(fit$n.risk, 0)[index2]
+
+            for (i in c("n.event", "n.censor", "n.enter"))
+                fit[[i]] <- delta(fit[[i]], index1)
+            fit
+        }
+
+        if (nstrat ==1) fit <- findrow(fit, times, extend)
+        else {
+            ltemp <- vector("list", nstrat)
+            if (length(dim(fit)) > 1) {
+                for (i in 1:nstrat) 
+                    ltemp[[i]] <- findrow(fit[i,], times, extend)
+            } else { 
+                for (i in 1:nstrat) ltemp[[i]] <- findrow(fit[i], times, extend)
+            }
+         
+            # now stack them: time= c(time for curve 1, time for curve 2, etc)
+            #  and so on for all components
+            unlistsurv <- function(x, name) {
+                temp <- lapply(x, function(x) x[[name]])
+                if (is.vector(temp[[1]])) unlist(temp)
+                else if (is.matrix(temp[[1]])) do.call("rbind", temp)
+            }
+
+            # unlist all the components built by a set of calls to findrow
+            #  and remake the strata
+            keep <- c("time", "surv", "upper", "lower", "std.err",
+                      "cumhaz", "n.risk", "n.event", "n.censor", "n.enter",
+                      "std.chaz")
+            for (i in keep) 
+                if (!is.null(fit[[i]])) fit[[i]] <- unlistsurv(ltemp, i)
+            fit$strata[] <- sapply(ltemp, function(x) length(x$time))
+        }
+    }
+
+    # finish off the output structure
+    # A survfit object may contain std(log S) or std(S), summary always std(S)
+    if (!is.null(fit$std.err) && fit$logse) 
+        fit$std.err <- fit$std.err * fit$surv   
+    if (scale != 1) {
+        # fix scale in the output
+        fit$time <- fit$time/scale
+    }
+
+    if (data.frame) {
+        fit <- unclass(fit)  # toss the survfit class
+        indx <- match(c("time", "n.risk", "n.event", "n.censor", 
+                        "surv", "cumhaz", "std.err", "std.chaz",
+                        "lower", "upper"), names(fit), nomatch=0)
+        if (!is.null(fit$strata))
+            newstrat <- factor(rep(1:nstrat, fit$strata), 1:nstrat,
+                               labels= names(fit$strata))
+        if (is.matrix(fit$surv)) { # survfit.coxph object
+            nc <- ncol(fit$surv)
+            ndata <- lapply(fit[indx], function(x) {
+                                 if (length(x)==0) NULL
+                                 else if (is.matrix(x)) c(x)
+                                 else rep(x, nc)})
+            ndata <- data.frame(ndata)
+            if (!is.null(fit$strata)) 
+                ndata$strata <- rep(newstrat, nc)
+            ndata$data <- rep(1:nc, each= length(fit$time))
+        } else {
+            ndata <- data.frame(fit[indx])
+            if (!is.null(fit$strata)) ndata$strata <- newstrat
+        }
+        ndata
+    } else {
+        fit$table <- table
+        if (length(rmean.endtime)>0  && !any(is.na(rmean.endtime[1]))) 
+            fit$rmean.endtime <- rmean.endtime
+        # Expand the strata. It has used 1,2,3 for a long while
+        if (!is.null(fit$strata)) 
+            fit$strata <- factor(rep(1:nstrat, fit$strata), 1:nstrat,
+                                 labels= names(fit$strata))
+        class(fit) <- "summary.survfit"
+        fit
+    }
+}