##The command ##source("http://parker.ad.siu.edu/Olive/survpack.txt") ##is an easy way to get these functions into R. bphgfit<-function(time, status, group){ # first type library(survival). # # Checks goodness of fit of Cox proportional # hazard model if time is a vector of times, status is 0 for # censored times and 1 otherwise and there is a single covariate # group = 1 for group (treatment) 1 and 0 else. # zd <- as.data.frame(cbind(time, status, group)) z0 <- zd[group == 0, ] z1 <- zd[group == 1, ] zk0 <- survfit(Surv(time[group == 0], status[group == 0])~1, conf.type = "none") zk1 <- survfit(Surv(time[group == 1], status[group == 1])~1, conf.type = "none") par(mfrow = c(2, 1)) plot(survfit(coxph(Surv(time, status) ~ group, zd), newdata = z0)) lines(zk0,type="p") title("Estimated Cox and KM survival, group 0" ) plot(survfit(coxph(Surv(time, status) ~ group, zd), newdata = z1)) lines(zk1,type="p") title("Estimated Cox and KM survival, group 1" ) par(mfrow=c(1,1)) } bphsim3<-function(n = 100, gam = 1, nruns = 1){ #first type library(survival). # # Gets plot for Cox proportional hazards data # with 1 indicator variable as a predictor. # calls bphgfit for(i in 1:nruns) { beta <- 1 group <- 0 * 1:n group[(n/2 + 1):n] <- 1 SP <- group * beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/gam) cen <- rexp(n, rate = 0.1) cengy <- pmin(gy, cen) status <- as.numeric(cen >= gy) bphgfit(cengy, status, group) } } confreg<-function(x, g = 4, that = 1:4, alpha = 0.05){ # Makes confidence regions for theta from rows of x = Ti* from a bootstrap. # Use fot testing H_0: theta = 0 versus H_1: theta != 0. # The prediction region method, hybrid, and Bickel and Ren regions are used. # If g = 1, the shorth interval should work better. # Also computes the distance for the 0 vector. # Need g = dim(x)[2] and T = that the g by 1 estimator of theta. # Often that = A betahat(I_min,0). # Note that center= Tbar* = bagging estimator and cov = S*_T. x <- as.matrix(x) that <- as.vector(that) n <- dim(x)[1] zero <- 0*(1:g) up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n)) if(alpha > 0.1) up <- min((1 - alpha + 0.05), (1 - alpha + g/n)) qn <- up if(qn < 1 - alpha + 0.001) up <- 1 - alpha center <- apply(x, 2, mean) cov <- var(x) md2 <- mahalanobis(x, center, cov) # MD is the classical distance MD <- sqrt(md2) #get prediction region method cutoff cuplim <- sqrt(quantile(md2, up)) D0 <- sqrt(mahalanobis(zero, center, cov)) #get hybrid region statistic = Bickel and Ren statistic br0 <- sqrt(mahalanobis(zero, that, cov)) #get the Bickel and Ren cutoff and test statistic br2 <- mahalanobis(x,that,cov) brlim <- sqrt(quantile(br2, up)) list(cuplim=cuplim,brlim=brlim,br0=br0,D0=D0,MD=MD,center=center,cov=cov) } exppisim<-function(n=100,nruns=100,B=1000,lam=1,clam=0.1,alpha=0.05){ #Simulates the Olive et al. (2019) PIs for right censored exponential data. #R uses Y~EXP(lam) with E(Y) = 1/lam #calls mshpi fullpilen <- 1:nruns fullpicov <- 0 for(i in 1:nruns) { y <- rexp(n, rate = lam) yf <- rexp(1, rate = lam) #uncensored cen <- rexp(n, rate = clam) ceny <- pmin(y, cen) status <- as.numeric(cen >= y) #1 for uncensored, 0 for censored #get the MLE m <- sum(status) #number of uncensored cases mle <- m/sum(ceny) ydat <- rexp(B,rate=mle) tem <- mshpi(yf=yf,ydat=ydat,n,d=1,alph=alpha) fullpilen[i] <- tem$up - tem$low fullpicov <- fullpicov + tem$inr } fullpimnlen <- mean(fullpilen) fullpicov <- fullpicov/nruns list(lam=lam,mle=mle,fullpicov=fullpicov,fullpimenlen=fullpimnlen) } kmsim2<-function(n = 100, runs = 100){ # simulation of classical, log, log-log and plus four 95% # confidence intervals for Kaplan Meier estimator # library(survival) ccov <- 0 * 1:n lcov <- ccov llcov <- ccov p4cov <- ccov clen <- ccov llen <- ccov lllen <- ccov p4len <- ccov np3 <- n + 3 np4 <- n + 4 for(i in 1:runs) { y <- rexp(n) z <- rexp(n, rate = 0.1) # get censored data t t <- pmin(y, z) delta <- as.numeric(z >= y) out <- survfit(Surv(t, delta)~1, conf.type = "plain") #Get S(t) then see if S(t_i) is in the CI for S(t_i) for i = 1 to n st <- exp( - out$time) ccov <- ccov + as.integer(st > out$low & st < out$up) clen <- clen + (out$up - out$low) out <- survfit(Surv(t, delta)~1) lcov <- lcov + as.integer(st > out$low & st < out$up) llen <- llen + (out$up - out$low) out <- survfit(Surv(t, delta)~1, conf.type = "log-log") llcov <- llcov + as.integer(st > out$low & st < out$up) lllen <- lllen + (out$up - out$low) # get plus 4 interval tmin <- min(t) tmax <- max(t) tp4 <- c(tmin/3, tmin/2, t, tmax + 1, tmax + 2) dp4 <- c(1, 1, delta, 1, 1) out <- survfit(Surv(tp4, dp4)~1, conf.type = "plain") outl <- out$low[ - c(1, 2, np3, np4)] outu <- out$up[ - c(1, 2, np3, np4)] p4cov <- p4cov + as.integer(st > outl & st < outu) p4len <- p4len + (outu - outl) } ccov <- ccov/runs lcov <- lcov/runs llcov <- llcov/runs p4cov <- p4cov/runs clen <- (sqrt(n) * clen)/runs llen <- (sqrt(n) * llen)/runs lllen <- (sqrt(n) * lllen)/runs p4len <- (sqrt(n) * p4len)/runs list(ccov = ccov, lcov = lcov, llcov = llcov, p4cov = p4cov, clen = clen, llen = llen, lllen = lllen, p4len = p4len) } LPHboot<-function(x,time,status,B=1000){ #needs library(glmnet), n > 5p, p > 2, want B >= 50p, #bootstraps the Cox regression lasso, takes a few minutes x <- as.matrix(x) n <- length(time) p <- dim(x)[2] y<-cbind(time,status) outlasso<-cv.glmnet(x,y,family="cox") lam <- outlasso$lambda.min betahat <- as.vector(predict(outlasso,type="coefficients",s=lam)) betas <- matrix(0,nrow=B,ncol=p) for(i in 1:B){ samp <- sample(1:n, replace=TRUE) tdat <- tdata[samp,] tx <- x[samp,] ty <- y[samp,] temp<-cv.glmnet(tx,ty,family="cox") lam <- temp$lambda.min betas[i,] <- as.vector(predict(temp,type="coefficients",s=lam)) } list(bhatimin0=betahat,betas=betas) } mshpi<-function(yf=0, ydat, n, d, alph = 0.05){ #Gets the Olive et al (2018) modified shorth PI for Yf given xf for GLMs and #1D regression where the B x 1 vector ydat is the training data, e.g #Poisson regression has yi ind ~ Poisson(exp(ESP)) and #d = ``plug in degrees of freedom." Can work if p > n. Yf is in the PI if #inr = 1. Useful for GLM, GAM, lasso, and forward selection. ydat<-as.vector(ydat) B<-length(ydat) inr <- 0 if (alph > 0.1) {qn <- min(1 - alph + 0.05, 1 - alph + d/n)} if (alph <= 0.1) {qn <- min(1 - alph/2, 1 - alph + 10*alph*d/n)} pn <- qn if(pn < 1 - alph + 0.001) qn <- 1 - alph cc <- ceiling(B * (qn + 1.12*sqrt(alph/B))) cc <- min(B,cc) sy <- sort(ydat) up <- sy[cc] low <- sy[1] olen <- up - low if(cc < B){ for(j in (cc + 1):B){ zlen <- sy[j] - sy[j - cc + 1] if(zlen < olen) { olen <- zlen up <- sy[j] low <- sy[j - cc + 1] } } } if(low <= yf && up >= yf) inr <- inr + 1 list(low=low, up=up, inr = inr)} PHboot<-function(x,time,status,B = 1000){ #needs library(survival), n > 5p, p > 2, #want B >= 50p, takes a few seconds #bootstraps the Cox PH regression full model with the nonparametric bootstrap x <- as.matrix(x) n <- length(time) p <- dim(x)[2] tdata <- as.data.frame(cbind(x,time,status)) out <- coxph(Surv(time, status) ~., data=tdata) bhat<-out$coef betas <- matrix(0,nrow=B,ncol=p) for(i in 1:B){ samp <- sample(1:n, replace=TRUE) tdat <- tdata[samp,] temp <- coxph(Surv(time, status) ~., data=tdat) betas[i,] <- temp$coef } list(bhat=bhat,betas=betas) } PHbootsim<-function(n=100,p=4,k=1,nruns=100,psi=0.0,B=1000,a=1,gam=1, clam=0.1,alpha=0.05){ #needs library(survival), n > 5p, p > 2, want B >= 50p, #bootstraps the Cox regression full model, takes a long time #U1 <= k < p, so zeroes are in the model, where k is the number of nonnoise variables #there are p coefficients for beta in the Weibull regression data #need K < p so zeroes are in the model #need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes # beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. # cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j # when the correlation exists. SP~N(0,a^2), and a near 1 is ok. rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2) val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2)) A <- matrix(psi,nrow=p,ncol=p) diag(A) <- 1 beta <- 0 * 1:p beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1] pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) zero <- 0 * 1:p one <- as.vector(0*1:k + 1) for(i in 1:nruns) { x <- matrix(rnorm(n * p), nrow = n, ncol = p) x <- val* x %*% A SP <- x%*%beta #SP_i ~ N(0,a^2) lambdai <- exp(SP) w <- rexp(n, rate = lambdai) y <- w^(1/gam) cen <- rexp(n, rate = clam) time <- pmin(y, cen) status <- as.numeric(cen >= y) tdata <- as.data.frame(cbind(x,time,status)) out <- coxph(Surv(time, status) ~., data=tdata) bhat<-out$coef betas <- matrix(0,nrow=B,ncol=p) for(i in 1:B){ samp <- sample(1:n, replace=TRUE) tdat <- tdata[samp,] temp <- coxph(Surv(time, status) ~., data=tdat) betas[i,] <- temp$coef } for (j in 1:p){ tem <- shorth3(betas[,j],alpha=alpha) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last p-k values of beta are 0 gg <- p - k tstat <- bhat[(k+1):p] tem <- confreg(betas[,(k+1):p],g=gg,that=tstat,alpha=alpha) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim #test whether the first k values of beta are (1,...,1) gg <- k tstat <- bhat[1:k] tem <- confreg(betas[,1:k],g=gg,that=tstat,alpha=alpha) D0 <- sqrt(mahalanobis(one, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(one, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns list(cicov=cicov,avelen=avelen,beta=beta,k=k) } phdata<-function(n = 100, q = 3, gam = 1, clam = 0.1){ # Generates data for Cox proportional hazards regression. # Follows Zhou (2001) # Get Weibull regression data with g(Y) = Y^(1/k) and # h_(g(Y))(t) = k t^(k-1) exp(SP). # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # beta <- 1 + 0 * 1:q x <- matrix(rnorm(n * q), nrow = n, ncol = q)/2 SP <- x %*% beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/gam) cen <- rexp(n, rate = clam) cengy <- pmin(gy, cen) status <- as.numeric(cen >= gy) ##change formula for coxph with q #out <- coxph(Surv(cengy, status) ~ x[, 1] + x[, 2] + x[, 3]) #ESP <- x %*% out$coef list(x = x, time = cengy, status = status) } phdata2<-function(n=100,p=4,k=1,psi=0.0,a=1,gam=1,clam=0.1){ #Use 1 <= k <= p, where k is the number of nonnoise variables. #there are p coefficients for beta in the Weibull regression data #need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes # beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. # cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j # when the correlation exists. SP~N(0,a^2), and a near 1 is ok. #set.seed(974) ##need p>2 rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2) val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2)) A <- matrix(psi,nrow=p,ncol=p) diag(A) <- 1 b <- 0 * 1:p b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1] x <- matrix(rnorm(n * p), nrow = n, ncol = p) x <- val* x %*% A SP <- x%*%b #SP_i ~ N(0,a^2) lambdai <- exp(SP) w <- rexp(n, rate = lambdai) y <- w^(1/gam) cen <- rexp(n, rate = clam) ceny <- pmin(y, cen) status <- as.numeric(cen >= y) tdata <- as.data.frame(cbind(x,y,status)) #out <- coxph(Surv(y, status) ~ ., data=tdata) #ESP <- x %*% out$coef list(betaP=b, x = x, time = ceny, status = status) } phgfit2<-function(x, time, status, slices = 9){ # Checks goodness of fit of Cox PH model, time is a vector of times. # KM curves should have the same shape if PH model is valid. # status is 0 for censored times and 1 otherwize. # In R type library(survival) # tem <- data.frame(time,status,x) tem <- data.frame(cbind(time,status,x)) out <- coxph(Surv(time, status) ~ ., tem) ESP <- x %*% out$coef indx <- sort.list(ESP) n <- length(time) val <- as.integer(n/slices) if(slices <= 4) par(mfrow = c(2, 2)) if( (slices == 5) || (slices == 6)) par(mfrow = c(2, 3)) if(slices >= 7) par(mfrow = c(3, 3)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") plot(zksi) } par(mfrow=c(1,1)) } phsim<-function(n = 100, q = 3, gam = 1, nruns = 1){ # Simulates censored response plots for Cox PH data. # Calls phdata and seyp. for(i in 1:nruns) { out <- phdata(n, q, gam) seyp(out$x, out$time, out$status) } } phsim2 <- function(n = 100, p = 3, gam = 1, slices = 9, nruns = 1){ # Simulates KM curves in slices. # KM curves should have the same shape if PH model is valid. # Calls phdata and phgfit2. for(i in 1:nruns) { out <- phdata(n, p, gam) phgfit2(out$x, out$time, out$status, slices) } } phsim5 <- function(n = 99, gam = 1, slices = 9){ # Simulates sliced survival plots for Cox PH regression with # pointwise CI bands. Follows Zhou (2001) to # get Weibull regression data with g(Y) = Y^(1/k) and # h_(g(Y))(t) = k t^(k-1) exp(SP). # In R, type library(survival) p <- 3 ## change formula with p beta <- 1 + 0 * 1:p x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2 SP <- x %*% beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/gam) cen <- rexp(n, rate = 0.1) time <- pmin(gy, cen) status <- as.numeric(cen >= gy) ##change formula for coxph with p x1<- x[,1]; x2 <- x[,2]; x3<-x[,3] xdata<-data.frame(cbind(time,status,x1,x2,x3)) out <- coxph(Surv(time, status) ~ x1 + x2 + x3,data=xdata) ESP <- x %*% out$coef indx <- sort.list(ESP) val <- as.integer(n/slices) if(slices <= 4) par(mfrow = c(2, 2)) if( (slices == 5) || (slices == 6)) par(mfrow = c(2, 3)) if(slices >= 7) par(mfrow = c(3, 3)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv and plot with p midpt <- lo + floor((hi-lo)/2) nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3]) nv<-data.frame(time=1,status=1,x1=nvt[1],x2=nvt[3],x3=nvt[3]) plot(survfit(coxph(Surv(time, status) ~ x1 + x2 + x3), newdata = nv)) lines(zksi,type="p") } par(mfrow=c(1,1)) list(coef = out$coef) } predsim<-function(n = 100, p = 4, nruns = 100, xtype = 1, dd = 1, eps = 0.25, alpha = 0.1) {# MAY NOT WORK IF p = 1. # Gets coverages of nonparametric, semiparametric and # parametric MVN prediction regions. # Multiply x by A where xtype = 1 for MVN Nq(0,I), # 2, 3, 4 and 10 (with delta = eps) for delta Np(0,I) + (1-delta) Np(0, 25 I) # 5 for lognormal, # 6, 7, 8 and 9 for multivariate t_d with d = 3, 5, 19 or dd # mahalanobis gives squared Maha distances # set.seed(974) ccvr <- 0 scvr <- 0 rcvr <- 0 volc <- 1:nruns vols <- volc volr <- volc #up <- 1 - alpha up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n)) if(alpha > 0.1) up <- min((1 - alpha + 0.05), (1 - alpha + p/n)) qn <- up if(qn < 1 - alpha + 0.001) up <- 1 - alpha A <- sqrt(diag(1:p)) m <- n + 1 for(i in 1:nruns) { #make data x <- matrix(rnorm(m * p), nrow = m, ncol = p) if(xtype == 2) { zu <- runif(m) x[zu < 0.4, ] <- x[zu < 0.4, ] * 5 } if(xtype == 3) { zu <- runif(m) x[zu < 0.6, ] <- x[zu < 0.6, ] * 5 } if(xtype == 4) { zu <- runif(m) x[zu < 0.1, ] <- x[zu < 0.1, ] * 5 } if(xtype == 5) x <- exp(x) if(xtype == 6) { zu <- sqrt(rchisq(m, 3)/3) x <- x/zu } if(xtype == 7) { zu <- sqrt(rchisq(m, 5)/5) x <- x/zu } if(xtype == 8) { zu <- sqrt(rchisq(m, 19)/19) x <- x/zu } if(xtype == 9) { zu <- sqrt(rchisq(m, dd)/dd) x <- x/zu } if(xtype == 10) { zu <- runif(m) x[zu < eps, ] <- x[zu < eps, ] * 5 } x <- x %*% A xx <- x[ - m, ] center <- apply(xx, 2, mean) cov <- var(xx) md2 <- mahalanobis(xx, center, cov) hsq <- quantile(md2, up) if(mahalanobis(t(x[m, ]), center, cov) <= hsq) ccvr <- ccvr + 1 volc[i] <- sqrt(hsq)^p * prod(diag(chol(cov))) out <- covrmvn(xx) center <- out$center cov <- out$cov md2 <- mahalanobis(xx, center, cov) hsq <- quantile(md2, up) dsq <- mahalanobis(t(x[m, ]), center, cov) if(dsq <= hsq) scvr <- scvr + 1 sqrtdet <- prod(diag(chol(cov))) vols[i] <- sqrt(hsq)^p * sqrtdet hsq <- qchisq(up, p) if(dsq <= hsq) rcvr <- rcvr + 1 volr[i] <- sqrt(hsq)^p * sqrtdet } ccvr <- ccvr/nruns scvr <- scvr/nruns rcvr <- rcvr/nruns #get a measure of efficiency wrt vols, so eff(vols) = 1 vols <- mean(vols) volc <- mean(volc)/vols volr <- mean(volr)/vols vols <- 1 list(ncvr = ccvr, scvr = scvr, mcvr = rcvr, voln = volc, vols = vols, volm = volr, up = up) } RLPHboot<-function(x,time,status,B=1000){ #needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, #bootstraps the Cox regression relaxed lasso, takes a few minutes x <- as.matrix(x) n <- length(time) p <- dim(x)[2] vars <- 1:p y<-cbind(time,status) outlasso<-cv.glmnet(x,y,family="cox") lam <- outlasso$lambda.min lcoef <- as.vector(predict(outlasso,type="coefficients",s=lam)) vin <- vars[lcoef!=0] tx <- x[,vin] tdat <- as.data.frame(cbind(tx,time,status)) sub <- coxph(Surv(time, status) ~., data=tdat) bhatimin0 <- 0 * 1:p bhatimin0[vin] <- sub$coef betas <- matrix(0,nrow=B,ncol=p) for(i in 1:B){ samp <- sample(1:n, replace=TRUE) tx <- x[samp,] ty <- y[samp,] ttime <- time[samp] tstatus <- status[samp] temp<-cv.glmnet(tx,ty,family="cox") lam <- temp$lambda.min lcoef <- as.vector(predict(temp,type="coefficients",s=lam)) vin <- vars[lcoef!=0] tx <- tx[,vin] tdat <- as.data.frame(cbind(tx,ttime,tstatus)) sub <- coxph(Surv(ttime, tstatus) ~., data=tdat) betas[i,vin] <- sub$coef } list(bhatimin0=bhatimin0,betas=betas) } RLPHboot2<-function(x,time,status,c=0.01,aug=F,B=1000){ #needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, #bootstraps the Cox regression relaxed lasso, #using bhatVS and bhatMIX, takes a few minutes #If augm neq F, adds cB full model bootstrap samples so S*_T is better #conditioned. x <- as.matrix(x) n <- length(time) p <- dim(x)[2] vars <- 1:p d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 y<-cbind(time,status) outlasso<-cv.glmnet(x,y,family="cox") lam <- outlasso$lambda.min lcoef <- as.vector(predict(outlasso,type="coefficients",s=lam)) vin <- vars[lcoef!=0] tx <- x[,vin] tdat <- as.data.frame(cbind(tx,time,status)) sub <- coxph(Surv(time, status) ~., data=tdat) bhatimin0 <- 0 * 1:p bhatimin0[vin] <- sub$coef betas <- matrix(0,nrow=B,ncol=p) btmix <- betas for(i in 1:B){ samp <- sample(1:n, replace=TRUE) tx <- x[samp,] ty <- y[samp,] ttime <- time[samp] tstatus <- status[samp] temp<-cv.glmnet(tx,ty,family="cox") lam <- temp$lambda.min lcoef <- as.vector(predict(temp,type="coefficients",s=lam)) vin <- vars[lcoef!=0] tx <- tx[,vin] tdat <- as.data.frame(cbind(tx,ttime,tstatus)) sub <- coxph(Surv(ttime, tstatus) ~., data=tdat) betas[i,vin] <- sub$coef # get betahatMIX* samp <- sample(1:n, replace=TRUE) tx <- x[samp,] ty <- y[samp,] ttime <- time[samp] tstatus <- status[samp] tx <- tx[,vin] tdat <- as.data.frame(cbind(tx,ttime,tstatus)) sub <- coxph(Surv(ttime, tstatus) ~., data=tdat) btmix[i,vin] <- sub$coef } if(aug == F) {betas <- betas[1:B,]; btmix <- btmix[1:B,]} else { for(i in bp1:bpd){ samp <- sample(1:n, replace=TRUE) tdat <- tdata[samp,] temp <- coxph(Surv(time, status) ~., data=tdat) betas[i,]<-temp$coef } btmix[bp1:bpd,] <- betas[bp1:bpd,] } list(bhatimin0=bhatimin0,betas=betas,btmix=btmix) } RLPHbootsim<-function(n=100,p=4,k=1,nruns=100,psi=0.0,B=1000,a=1,gam=1, clam=0.1,alpha=0.05) { #needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, #bootstraps the Cox regression lasso variable selection, takes a long time #Use 1 <= k < p, so zeroes are in the model, k is the number of nonnoise variables. #there are p coefficients for beta in the Weibull regression data #need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes # beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. # cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j # when the correlation exists. SP~N(0,a^2), and a near 1 is ok. rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2) val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2)) A <- matrix(psi,nrow=p,ncol=p) diag(A) <- 1 beta <- 0 * 1:p beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1] vars <- 1:p pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) zero <- 0 * 1:p one <- as.vector(0*1:k + 1) dd <- 1:nruns for(i in 1:nruns) { x <- matrix(rnorm(n * p), nrow = n, ncol = p) x <- val* x %*% A SP <- x%*%beta #SP_i ~ N(0,a^2) lambdai <- exp(SP) w <- rexp(n, rate = lambdai) y <- w^(1/gam) cen <- rexp(n, rate = clam) time <- pmin(y, cen) status <- as.numeric(cen >= y) tdata <- as.data.frame(cbind(x,time,status)) y<-cbind(time,status) outlasso<-cv.glmnet(x,y,family="cox") lam <- outlasso$lambda.min lcoef <- as.vector(predict(outlasso,type="coefficients",s=lam)) vin <- vars[lcoef!=0] tx <- x[,vin] tdat <- as.data.frame(cbind(tx,time,status)) sub <- coxph(Surv(time, status) ~., data=tdat) bhat <- zero bhat[vin] <- sub$coef dd[i]<-length(sub$coef) betas <- matrix(0,nrow=B,ncol=p) #nonparametric bootstrap for(m in 1:B){ samp <- sample(1:n, replace=TRUE) tx <- x[samp,] ty <- y[samp,] ttime <- time[samp] tstatus <- status[samp] temp<-cv.glmnet(tx,ty,family="cox") lam <- temp$lambda.min lcoef <- as.vector(predict(temp,type="coefficients",s=lam)) vin <- vars[lcoef!=0] tx <- tx[,vin] tdat <- as.data.frame(cbind(tx,ttime,tstatus)) sub <- coxph(Surv(ttime, tstatus) ~., data=tdat) betas[m,vin] <- sub$coef } for (j in 1:p){ tem <- shorth3(betas[,j],alpha=alpha) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last p-k values of beta are 0 gg <- p - k tstat <- bhat[(k+1):p] tem <- confreg(betas[,(k+1):p],g=gg,that=tstat,alpha=alpha) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim #test whether the first k values of beta are (1,...,1) gg <- k tstat <- bhat[1:k] tem <- confreg(betas[,1:k],g=gg,that=tstat,alpha=alpha) D0 <- sqrt(mahalanobis(one, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(one, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns mndd <- mean(dd) list(mndd=mndd,cicov=cicov,avelen=avelen,beta=beta,k=k) } RLPHbootsim2<-function(n=100,p=4,k=2,nruns=100,psi=0.0,BB=1000,a=1,gam=1, clam=0.1,cc=0.01,augm=F,alph=0.05){ #needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, ##Convergence problems is psi much larger than 1/sqrt(p). #Bootstraps the Cox regression lasso variable selection and bhatMIX. # takes a long time #Use 1 <= k < p so zeroes are in the model, k is the number of nonnoise variables. #there are p coefficients for beta in the Weibull regression data #need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes # beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. # cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j # when the correlation exists. SP~N(0,a^2), and a near 1 is ok. rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2) val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2)) A <- matrix(psi,nrow=p,ncol=p) diag(A) <- 1 beta <- 0 * 1:p beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1] vars <- 1:p pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen zero <- 0 * 1:p one <- as.vector(0*1:k + 1) dd <- 1:nruns for(i in 1:nruns) { x <- matrix(rnorm(n * p), nrow = n, ncol = p) x <- val* x %*% A SP <- x%*%beta #SP_i ~ N(0,a^2) lambdai <- exp(SP) w <- rexp(n, rate = lambdai) y <- w^(1/gam) cen <- rexp(n, rate = clam) time <- pmin(y, cen) status <- as.numeric(cen >= y) #make a Weibull PH model out <-RLPHboot2(x,time,status,c=cc,aug=augm,B=BB) #bootstrap the lasso variable selection model for (j in 1:p){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last p-k values of beta are 0 gg <- p - k tstat <- out$bhatimin0[(k+1):p] tem <- confreg(out$betas[,(k+1):p],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,(k+1):p],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim #test whether the first k values of beta are (1,...,1) gg <- k tstat <- out$bhatimin0[1:k] tem <- confreg(out$betas[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(one, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(one, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(one, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(one, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns list(cicov=cicov,avelen=avelen,cicovmix=cicovmix,avelenmix=avelenmix, beta=beta,k=k) } seyp<-function(x, time, status){ # Makes the censored response plot for Cox proportional hazards regression. xdata<-data.frame(cbind(time,status,x)) out <- coxph(Surv(time, status) ~ ., data=xdata) ESP <- x %*% out$coef plot(ESP, time, type = "n") points(ESP[status == 1], time[status == 1], pch = 3) points(ESP[status == 0], time[status == 0], pch = 1) } shorth3<-function(y, alpha = 0.05){ # computes the shorth(c) interval [Ln,Un] for c = cc. #shorth lists Ln and Un using Frey's correction n <- length(y) cc <- ceiling(n * (1 - alpha + 1.12*sqrt(alpha/n))) cc <- min(n,cc) sy <- sort(y) rup <- sy[cc] rlow <- sy[1] olen <- rup - rlow if(cc < n){ for(j in (cc + 1):n){ zlen <- sy[j] - sy[j - cc + 1] if(zlen < olen) { olen <- zlen rup <- sy[j] rlow <- sy[j - cc + 1] } } } Ln <- rlow; Un <- rup list(shorth=c(Ln, Un)) } swhat <- function(stime,bhat,nv,ghat,lhat){ # Finds estimated survival function for Weibull regression # evaluated at x = nv. r <- length(stime) shat <- 1:r for(i in 1:r){ shat[i] <- exp(-exp(-ghat*bhat%*%nv)*lhat*stime[i]^ghat) } list(shat=shat) } vlung2 <- function(sexv=2){ # uses 4 slices instead of the 9 used by vlung # Check goodness of fit of Cox PH for R lung data. # for females use sexv = 2 # for males use sexv = 1, change the 2 to a 1 flung<-lung[lung$sex==sexv,] nflung<-na.omit(flung[,c("time","status","ph.ecog","ph.karno", "pat.karno","wt.loss")]) out <- coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung) x <- as.matrix(nflung[,c(3,4,5,6)]) time <- as.vector(nflung[,1]) status <- as.vector(nflung[,2]) ESP <- x %*% out$coef indx <- sort.list(ESP) n <- 85 val <- as.integer(n/4) cmar<-par("mar") par(mfrow = c(2, 2)) #par(mar=c(4.0,4.0,2.0,0.5)) for(i in 1:4){ lo <- (i-1)*val + 1 hi <- i*val if(i == 4) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3],x[indx[midpt],4]) #take a typical value of time nv<-data.frame(time=300,status=1,ph.ecog=nvt[1],ph.karno=nvt[2], pat.karno=nvt[3],wt.loss=nvt[4]) plot(survfit(coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung), newdata = nv), xlab="Time",ylab="Estimated S(t)") lines(zksi,type="p") } par(mfrow=c(1,1)) par(mar=cmar)} vnwtco <- function(){ #visualizing NWTCO data ##type library(survival) #>nwtco[1,] # seqno instit histol stage study rel edrel age in.subcohort # 1 2 2 1 3 0 6075 25 FALSE z<-coxph(Surv(edrel,rel)~histol+instit+age+stage,data=nwtco) summary(z) ccex<-par("cex") par(mfrow=c(2,2)) par(cex=0.3) zz<- cox.zph(z) # adding cex=0.1 has no effect plot(zz, cex=0.1) print(zz) par(cex=ccex) #test seems to contradict plots which have fairly horizontal loess time <- as.vector(nwtco[,7]) status <- as.vector(nwtco[,6]) out <- coxph(Surv(time, status)~histol+instit+age+stage,data=nwtco) x <- as.matrix(nwtco[,c(3,2,8,4)]) ESP <- x %*% out$coef indx <- sort.list(ESP) n <- 4028 ##change par(mfrow=c(2,3)) with slices slices<-6 val <- as.integer(n/slices) cmar<-par("mar") ccex<-par("cex") par(mfrow = c(2, 3)) par(mar=c(4.0,4.0,2.0,0.5)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3], x[indx[midpt],4]) #use typical values for seqno,study,time, and in.subcohort nv<-data.frame(seqno=4089,instit=nvt[2],histol=nvt[1],stage=nvt[4],study=3, status=1,time=300,age=nvt[3],in.subcohort=TRUE) plot(survfit(coxph(Surv(time, status)~ histol+instit+age+stage,data = nwtco), newdata = nv), xlab="Time",ylab="Estimated S(t)",cex=0.1) lines(zksi,cex=0.1,type="p") } par(mfrow=c(1,1)) par(mar=cmar) par(cex=ccex) } vovar <- function(){ ####bug in new version of R for slice survival plot # uses 4 slices, calls swhat # check goodness of fit of Cox PH and Weibull PH # models for the ovarian cancer data time <- c(156, 1040, 59, 421, 329, 769, 365, 770, 1227, 268, 475, 1129, 464, 1206, 638, 563, 1106, 431, 855, 803, 115, 744, 477, 448, 353, 377) status <- c(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0) treat <- c(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0) age <- c(66, 38, 72, 53, 43, 59, 64, 57, 59, 74, 59, 53, 56, 44, 56, 55, 44, 50, 43, 39, 74, 50, 64, 56, 63, 58) ovar <- cbind(time,status,treat,age) #paste these commands for the LCR plot z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull") AFTESP <- 11.548 -.561*ovar[,3] -.079*ovar[,4] logT <- log(ovar[,1]) plot(AFTESP,logT,pch = ' ') points(AFTESP[ovar[,2]==0],log(ovar[ovar[,2]==0,1]),pch=1) points(AFTESP[ovar[,2]==1],log(ovar[ovar[,2]==1,1]),pch=3) abline(0,1) #paste these commands for the Weibull EE plot z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull") zx <- cbind(ovar[,3],ovar[,4]) zc <- coxph(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4]) sighat<-z$scale ESPC <- zx%*%zc$coef ESPW <- -zx%*%z$coef[-1]/sighat plot( ESPW, ESPC) abline(0,1) title("a) Weibull") cor( ESPW, ESPC) #paste these commands for the Exponential EE plot ze <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="exponential") ESPE <- -zx%*%ze$coef[-1] plot( ESPE, ESPC) abline(0,1) title("b) Exponential") ##########bug in plot below for new version of R #type vovar() to produce the slice survival plot ghat <- 1/z$scale ahat <- z$coef[1] bhat <- z$coef[-1] lhat <- exp(-ahat*ghat) stime <- sort(time) x <- zx ESP <- ESPC indx <- sort.list(ESP) n <- 26 val <- as.integer(n/4) cmar<-par("mar") par(mfrow = c(2, 2)) #par(mar=c(4.0,4.0,2.0,0.5)) for(i in 1:4){ lo <- (i-1)*val + 1 hi <- i*val if(i == 4) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nv <- c(x[indx[midpt],1],x[indx[midpt],2]) plot(survfit(coxph(Surv(time, status)~ ovar[,3]+ovar[,4]), newdata = nv),xlab="Time",ylab="Estimated S(t)") sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat points(stime,sfit,pch=3) lines(zksi,type="p") } par(mfrow=c(1,1)) par(mar=cmar) } weyp<-function(x, time, status){ # Makes the log censored response plot for Weibull proportional hazards regression. xdata<-data.frame(cbind(time,status,x)) out <- survreg(Surv(time, status) ~ ., data=xdata) ESP <- x %*% out$coef[-1] + out$coef[1] plot(ESP, log(time), type = "n") points(ESP[status == 1], log(time[status == 1]), pch = 3) points(ESP[status == 0], log(time[status == 0]), pch = 1) abline(0, 1) #abline(lsfit(ESP[status == 1], log(time[status == 1]))$coef) } wphsim <- function(n = 99, gam = 1, slices = 9){ # Generates data for Weibull PH regression using Zhou (2001). # Makes a slice survival plot for Weibull regression. # The AFT ESP is used instead of the PH ESP, so survival # increases as the AFT ESP increases. # Uses crosses for Weibull and circles for KM. # Get Weibull regression data with g(Y) = Y^(1/k) and # h_(g(Y))(t) = k t^(k-1) exp(SP). # # Calls function swhat. # type library(survival) p <- 3 ## change formula with p beta <- 1 + 0 * 1:p x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2 SP <- x %*% beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/gam) cen <- rexp(n, rate = 0.1) time <- pmin(gy, cen) status <- as.numeric(cen >= gy) ##change formula for survreg with p out <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) ESP <- x %*% out$coef[-1] ghat <- 1/out$scale ahat <- out$coef[1] bhat <- out$coef[-1] lhat <- exp(-ahat*ghat) stime <- sort(time) indx <- sort.list(ESP) val <- as.integer(n/slices) if(slices <= 4) par(mfrow = c(2, 2)) if( (slices == 5) || (slices == 6)) par(mfrow = c(2, 3)) if(slices >= 7) par(mfrow = c(3, 3)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nv <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3]) sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat plot(stime,sfit,pch=3) lines(zksi,type="p") } par(mfrow=c(1,1)) list(coef = out$coef) } wpisim<-function(n = 100, p = 4, k = 1, nruns = 100, psi = 0.0, J = 5, B=1000, a = 1, gam=1, clam=0.1, alpha = 0.05){ #Program can fail if n/p is small and nruns > 100. #Use 1 <= k <= p, where k is the number of nonnoise variables. #Simulates the Olive et al. (2019) PIs for Weibull regression. #PIs for full model, need library(survival) #there are p coefficients #need p > 1, beta = (1, ..., 1, 0, ..., 0) with k ones, p-k zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. # cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j # when the correlation exists. SP~N(0,a^2), and a near 1 is ok. #set.seed(974) ##need p>2 fullpilen <- 1:nruns fullpicov <- 0 rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2) val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2)) A <- matrix(psi,nrow=p,ncol=p) diag(A) <- 1 b <- 0 * 1:p b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1] for(i in 1:nruns) { x <- matrix(rnorm(n * p), nrow = n, ncol = p) x <- val* x %*% A xf <- val* rnorm(p) %*% A SP <- x%*%b #SP_i ~ N(0,a^2) lambdai <- exp(SP) lambdaf <- exp(xf%*%b) w <- rexp(n, rate = lambdai) wf <- rexp(1, rate = lambdaf) y <- w^(1/gam) yf <- wf^(1/gam) #uncensored cen <- rexp(n, rate = clam) ceny <- pmin(y, cen) status <- as.numeric(cen >= y) statusf <- c(status,1) tdata <- as.data.frame(cbind(x,ceny,status)) cenyn <- c(y,yf) xn <- rbind(x,xf) tdat <- as.data.frame(cbind(xn,cenyn,statusf)) #make a WR data set #get full model WR PI if(n >= 5*p){ outw <- survreg(Surv(ceny, status) ~ ., data = tdata) int <- outw$coef[1] bhat <- outw$coef[-1] sig <- outw$scale ghat=1/sig espw <- -xf%*%bhat/sig lamxf <- exp(-int/sig)*exp(espw) sc <- 1/lamxf^(1/ghat) ydat <- rweibull(B,shape=ghat,scale=sc) #tem <- shpi(yf=yf,ydat=ydat,alph=alpha) tem <- mshpi(yf=yf,ydat=ydat,n,d=p,alph=alpha) fullpilen[i] <- tem$up - tem$low fullpicov <- fullpicov + tem$inr } } fullpimnlen <- mean(fullpilen) fullpicov <- fullpicov/nruns list(int=int,beta=b,fullpicov=fullpicov,fullpimenlen=fullpimnlen) } wregsim<-function(n = 100, q = 3, gam = 1, nruns = 1){ # Simulates log censored response plots for Weibull PH data. # calls phdata and weyp for(i in 1:nruns) { out <- phdata(n, q, gam) weyp(out$x, out$time, out$status) } } wregsim2<-function(n = 100, p=3, gam = 1, nruns = 1){ # Simulates EE plots for Weibull proportional hazards AFT data. # calls phdata # R users need to type library(survival) corr <- 1:nruns for(i in 1:nruns) { out <- phdata(n, p, gam) x <- out$x time <- out$time status <- out$status dat<-data.frame(cbind(time,status,x)) outp <- coxph(Surv(time, status) ~ ., data=dat) outw <- survreg(Surv(time, status) ~ ., data = dat) shat <- outw$scale ESPC <- x %*% outp$coef ESPW <- -(x %*% outw$coef[-1])/shat plot( ESPW, ESPC) corr[i] <- cor(ESPW, ESPC) abline(0, 1) } mncorr <- mean(corr) list(mncorr=mncorr) } wregsim3<-function(n = 100, p=3, gam = 1, lam = 0.01, nruns = 100){ # See if alphahat and betahat are consistent estimators. # Calls phdata. # want mnint = 0, mncoef[i] = -1/gam, mnscale = 1/gam wout <- matrix(0, nrow = nruns, ncol = (p+2)) for(i in 1:nruns) { out <- phdata(n, p, gam, lam) x <- out$x time <- out$time status <- out$status dat<-data.frame(cbind(time,status,x)) outw <- survreg(Surv(time, status) ~ ., data = dat) wout[i, 1:(p+1)] <- outw$coef wout[i, (p+2)] <- outw$scale } wmn <- apply(wout, 2, mean) mnint <- wmn[1] mncoef <- wmn[2:(p+1)] mnscale <- wmn[(p+2)] list(mnint = mnint, mncoef = mncoef, mnscale = mnscale) } wregsim4<-function(n=100,p=4,k=1,psi=0,a=1,gam=1,clam=0.01, nruns = 100){ # See if alphahat and betahat are consistent estimators for the WAFT model. # Calls phdata2. beta_A = -(1/gam,...,1/gam,0,...,0)^T with k ones # want mnint = 0, mncoef = beta_A, mnscale = 1/gam wout <- matrix(0, nrow = nruns, ncol = (p+2)) for(i in 1:nruns) { out <- phdata2(n, p, k, psi, a, gam, clam) x <- out$x time <- out$time status <- out$status dat<-data.frame(cbind(time,status,x)) outw <- survreg(Surv(time, status) ~ ., data = dat) wout[i, 1:(p+1)] <- outw$coef wout[i, (p+2)] <- outw$scale } wmn <- apply(wout, 2, mean) mnint <- wmn[1] mncoef <- wmn[2:(p+1)] mnscale <- wmn[(p+2)] list(mnint = mnint, mncoef = mncoef, mnscale = mnscale) } wregsim5<-function(n=100,p=4,k=1,psi=0,a=1,gam=1,clam=0.01, nruns = 100){ # See if betahat is a consistent estimator for the WPH model. # Calls phdata2. beta_A = -(1/gam,...,1/gam,0,...,0)^T with p-k zeroes #beta_P = (1,...,1,0...,0)^T with k ones and p-k zeroes # want mncoef[i] = beta_P wout <- matrix(0, nrow = nruns, ncol = p) for(i in 1:nruns) { out <- phdata2(n, p, k, psi, a, gam, clam) x <- out$x time <- out$time status <- out$status dat<-data.frame(cbind(time,status,x)) outw <- coxph(Surv(time, status) ~ ., data = dat) wout[i, 1:p] <- outw$coef } wmn <- apply(wout, 2, mean) list(mncoef = wmn) }