##The command
##source("http://parker.ad.siu.edu/Olive/survpack.txt")
##is an easy way to get these functions into R.

BJcovxz<-function(n = 100, p = 4, k=1, psi = 0.0,  a=1, gam= 1,  clam=0.1){
#needs library(rms), library(survival), 
#Z = log(Y) = log(survival time), gets an estimate of cov(x,Z)
#using the Buckley James estimator
#Use 1 <= k < p so zeroes are in the model, k= the number of nonnoise variables
#there are p coefficients for beta in the Weibull regression data 
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
beta <- 0 * 1:p
beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1]
uncen <- 0*1:n + 1 
x <- matrix(rnorm(n * p), nrow = n, ncol = p)
x <- val* x %*% A
SP <-  x%*%beta    #SP_i  ~ N(0,a^2)
lambdai <- exp(SP)               
w <- rexp(n, rate = lambdai)
y <- w^(1/gam)
z <- log(y)
cen <- rexp(n, rate = clam)
time <- pmin(y, cen)
status <- as.numeric(cen >= y)		
#make a Weibull PH data set  
         tdata <- as.data.frame(cbind(x,time,status))
if(p==3){
names(tdata) <- c("V1","V2","V3","time","status")
outbj <- bj(Surv(time,status)~V1+V2+V3,data=tdata)}
if(p==4){
names(tdata) <- c("V1","V2","V3","V4","time","status")
outbj <- bj(Surv(time,status)~V1+V2+V3+V4,data=tdata)}
if(p==5){
names(tdata) <- c("V1","V2","V3","V4","V5","time","status")
outbj <- bj(Surv(time,status)~V1+V2+V3+V4+V5,data=tdata)}
bhatbj <- coef(outbj)  #J estimate of beta_A
outw <- survreg(Surv(time, status) ~ ., data = tdata)               
bhatwaft <- outw$coef[-1] #Weibull AFT estimate of beta_A
betaA <- -beta/gam #pop beta_A
covxzhat <- cov(x,z)  #sample cov(x,z) using n uncensored response times (usually unknown)
covx <- (val^2)*A %*% t(A) #pop cov(x)
covxz <- covx %*% betaA  #pop cov(x,Z)
covxzwaft <- cov(x) %*% bhatwaft #Weibull AFT est of cov(x,z)
covxzBJ = cov(x)%*%bhatbj[-1] #BJ est of cov(x,z)
C <- cbind(covxz,covxzhat,covxzwaft,covxzBJ)   
list(k=k,betaA=betaA,bhatwaft=bhatwaft,bhatbj=bhatbj[-1],C=C)}

BJcovxzsim<-function(n = 100, p = 4, nruns=100, k=1, psi = 0.0,  a=1, gam= 1,  clam=0.1){
#needs library(rms), library(survival), 
#Z = log(Y) = log(survival time), gets an estimate of cov(x,Z)
#using the Buckley James estimator
#Use 1 <= k < p so zeroes are in the model, k= the number of nonnoise variables
#there are p coefficients for beta in the Weibull regression data 
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
beta <- 0 * 1:p
beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1]
uncen <- 0*1:n + 1 
betaA <- -beta/gam #pop beta_A
covx <- (val^2)*A %*% t(A) #pop cov(x)
covxz <- covx %*% betaA  #pop cov(x,Z)
covxzhat <- matrix(0,nrow=nruns,ncol=p)
covxzwaft <- covxzhat
covxzBJ <- covxzhat
for(i in 1:nruns) {
x <- matrix(rnorm(n * p), nrow = n, ncol = p)
x <- val* x %*% A
SP <-  x%*%beta    #SP_i  ~ N(0,a^2)
lambdai <- exp(SP)               
w <- rexp(n, rate = lambdai)
y <- w^(1/gam)
z <- log(y)
cen <- rexp(n, rate = clam)
time <- pmin(y, cen)
status <- as.numeric(cen >= y)		
#make a Weibull PH data set  
         tdata <- as.data.frame(cbind(x,time,status))
if(p==3){
names(tdata) <- c("V1","V2","V3","time","status")
outbj <- bj(Surv(time,status)~V1+V2+V3,data=tdata)}
if(p==4){
names(tdata) <- c("V1","V2","V3","V4","time","status")
outbj <- bj(Surv(time,status)~V1+V2+V3+V4,data=tdata)}
if(p==5){
names(tdata) <- c("V1","V2","V3","V4","V5","time","status")
outbj <- bj(Surv(time,status)~V1+V2+V3+V4+V5,data=tdata)}
bhatbj <- coef(outbj)  #J estimate of beta_A
outw <- survreg(Surv(time, status) ~ ., data = tdata)               
bhatwaft <- outw$coef[-1] #Weibull AFT estimate of beta_A
covxzhat[i,] <- cov(x,z)  #sample cov(x,z) using n uncensored response times (usually unknown)
covxzwaft[i,] <- cov(x) %*% bhatwaft #Weibull AFT est of cov(x,z)
covxzBJ[i,] = cov(x)%*%bhatbj[-1] #BJ est of cov(x,z)
}
covxzhatmn=apply(covxzhat,2,mean)
covxzhatsd=apply(covxzhat,2,sd)
covxzwaftmn=apply(covxzwaft,2,mean)
covxzwaftsd=apply(covxzwaft,2,sd)
covxzBJmn=apply(covxzBJ,2,mean)
covxzBJsd=apply(covxzBJ,2,sd)
list(k=k,betaA=betaA,bhatwaft=bhatwaft,bhatbj=bhatbj[-1],
covxz=t(covxz),covxzhatmn=covxzhatmn,covxzhatsd=covxzhatsd,
covxzwaftmn=covxzwaftmn,covxzwaftsd=covxzwaftsd,
covxzBJmn=covxzBJmn,covxzBJsd=covxzBJsd)}


bphgfit<-function(time, status, group){
# first type library(survival). 
#
# Checks goodness of fit of Cox proportional
# hazard model if time is a vector of times, status is 0 for 
# censored times and 1 otherwise and there is a single covariate 
# group = 1 for group (treatment) 1 and 0 else.
#
	zd <- as.data.frame(cbind(time, status,
		group))
	z0 <- zd[group == 0,  ]
	z1 <- zd[group == 1,  ]
	zk0 <- survfit(Surv(time[group == 0], 
		status[group == 0])~1, conf.type
		 = "none")
	zk1 <- survfit(Surv(time[group == 1], 
		status[group == 1])~1, conf.type
		 = "none")
	par(mfrow = c(2, 1))
	plot(survfit(coxph(Surv(time, status) ~ 
		group, zd), newdata = z0))
	lines(zk0,type="p")
	title("Estimated Cox and KM survival, group 0"
		)
	plot(survfit(coxph(Surv(time, status) ~ 
		group, zd), newdata = z1))
	lines(zk1,type="p")
	title("Estimated Cox and KM survival, group 1"
		)
        par(mfrow=c(1,1))
}


bphsim3<-function(n = 100, gam = 1, nruns = 1){
#first type library(survival). 
#
# Gets plot for Cox proportional hazards data
# with 1 indicator variable as a predictor.
# calls bphgfit 
	for(i in 1:nruns) {
		beta <- 1
		group <- 0 * 1:n
		group[(n/2 + 1):n] <- 1
		SP <- group * beta
		lambdai <- exp(SP)
		y <- rexp(n, rate = lambdai)
		gy <- y^(1/gam)
		cen <- rexp(n, rate = 0.1)
		cengy <- pmin(gy, cen)
		status <- as.numeric(cen >= gy)
		bphgfit(cengy, status, group)
	}
}


confreg<-function(x, g = 4, that = 1:4, alpha = 0.05){
# Makes confidence regions for theta from rows of x = Ti* from a bootstrap.
# Use fot testing H_0: theta = 0 versus H_1: theta != 0.
# The prediction region method, hybrid, and Bickel and Ren regions are used.
# If g = 1, the shorth interval should work better.
# Also computes the distance for the 0 vector.
# Need g = dim(x)[2] and T = that the g by 1 estimator of theta.
# Often that = A betahat(I_min,0). 
# Note that center= Tbar* = bagging estimator and cov = S*_T.
x <- as.matrix(x)
that <- as.vector(that)
n <- dim(x)[1]
zero <- 0*(1:g)
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n))
if(alpha > 0.1)
	up <- min((1 - alpha + 0.05), (1 - alpha + g/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
# MD is the classical  distance
MD <- sqrt(md2)
#get prediction region method cutoff
cuplim <- sqrt(quantile(md2, up))
D0 <- sqrt(mahalanobis(zero, center, cov))
#get hybrid region statistic = Bickel and Ren statistic
br0 <- sqrt(mahalanobis(zero, that, cov))
#get the Bickel and Ren cutoff and test statistic
br2 <- mahalanobis(x,that,cov)
brlim <- sqrt(quantile(br2, up))
list(cuplim=cuplim,brlim=brlim,br0=br0,D0=D0,MD=MD,center=center,cov=cov)
}


exppisim<-function(n=100,nruns=100,B=1000,lam=1,clam=0.1,alpha=0.05){
#Simulates the  Olive et al. (2019) PIs for right censored exponential data. 
#R uses Y~EXP(lam) with E(Y) = 1/lam
#calls mshpi
fullpilen <- 1:nruns
fullpicov <- 0
  for(i in 1:nruns) {
     y <- rexp(n, rate = lam)
     yf <- rexp(1, rate = lam) #uncensored	
     cen <- rexp(n, rate = clam)
     ceny <- pmin(y, cen)
     status <- as.numeric(cen >= y) #1 for uncensored, 0 for censored
    #get the MLE
     m <- sum(status) #number of uncensored cases
     mle <- m/sum(ceny)        
     ydat <- rexp(B,rate=mle)
     tem <- mshpi(yf=yf,ydat=ydat,n,d=1,alph=alpha)
     fullpilen[i] <- tem$up - tem$low
     fullpicov <- fullpicov + tem$inr 
  } 
fullpimnlen <- mean(fullpilen)
fullpicov <- fullpicov/nruns
list(lam=lam,mle=mle,fullpicov=fullpicov,fullpimenlen=fullpimnlen)
}


kmsim2<-function(n = 100, runs = 100){
# simulation of classical, log, log-log and plus four 95% 
# confidence intervals for Kaplan Meier estimator
# library(survival)
	ccov <- 0 * 1:n
	lcov <- ccov
	llcov <- ccov
	p4cov <- ccov
	clen <- ccov
	llen <- ccov
	lllen <- ccov
	p4len <- ccov
	np3 <- n + 3
	np4 <- n + 4
	for(i in 1:runs) {
		y <- rexp(n)
		z <- rexp(n, rate = 0.1)	# get censored data t
		t <- pmin(y, z)
		delta <- as.numeric(z >= y)
		out <- survfit(Surv(t, delta)~1, conf.type = "plain")	
	#Get S(t) then see if S(t_i) is in the CI for S(t_i) for i = 1 to n
		st <- exp( - out$time)
		ccov <- ccov + as.integer(st > out$low & st < out$up)
		clen <- clen + (out$up - out$low)
		out <- survfit(Surv(t, delta)~1)
		lcov <- lcov + as.integer(st > out$low & st < out$up)
		llen <- llen + (out$up - out$low)
		out <- survfit(Surv(t, delta)~1, conf.type = "log-log")
		llcov <- llcov + as.integer(st > out$low & st < out$up)
		lllen <- lllen + (out$up - out$low)	# get plus 4 interval
		tmin <- min(t)
		tmax <- max(t)
		tp4 <- c(tmin/3, tmin/2, t, tmax + 1, tmax + 2)
		dp4 <- c(1, 1, delta, 1, 1)
		out <- survfit(Surv(tp4, dp4)~1, conf.type = "plain")
		outl <- out$low[ - c(1, 2, np3, np4)]
		outu <- out$up[ - c(1, 2, np3, np4)]
		p4cov <- p4cov + as.integer(st > outl & st < outu)
		p4len <- p4len + (outu - outl)
	}
	ccov <- ccov/runs
	lcov <- lcov/runs
	llcov <- llcov/runs
	p4cov <- p4cov/runs
	clen <- (sqrt(n) * clen)/runs
	llen <- (sqrt(n) * llen)/runs
	lllen <- (sqrt(n) * lllen)/runs
        p4len <- (sqrt(n) * p4len)/runs
list(ccov = ccov, lcov = lcov, llcov = llcov, p4cov = p4cov, clen = clen, llen = llen, lllen = lllen, p4len = p4len)
}


LPHboot<-function(x,time,status,B=1000){
#needs library(glmnet), n > 5p, p > 2, want B >= 50p, 
#bootstraps the Cox regression lasso, takes a few minutes
x <- as.matrix(x)
n <- length(time)
p <-  dim(x)[2]
y<-cbind(time,status)
outlasso<-cv.glmnet(x,y,family="cox")
lam <- outlasso$lambda.min
betahat <- as.vector(predict(outlasso,type="coefficients",s=lam)) 
betas <- matrix(0,nrow=B,ncol=p)
for(i in 1:B){
    samp <- sample(1:n, replace=TRUE)
    tdat <- tdata[samp,]
    tx <- x[samp,]
    ty <- y[samp,]
    temp<-cv.glmnet(tx,ty,family="cox")
    lam <- temp$lambda.min
    betas[i,] <- as.vector(predict(temp,type="coefficients",s=lam))
    }
list(bhatimin0=betahat,betas=betas)
}

mshpi<-function(yf=0, ydat, n, d, alph = 0.05){
#Gets the Olive et al (2018) modified shorth PI for Yf given xf for GLMs and 
#1D regression where the B x 1 vector ydat is the training data, e.g 
#Poisson regression has yi ind ~ Poisson(exp(ESP)) and 
#d = ``plug in degrees of freedom." Can work if p > n. Yf is in the PI if 
#inr = 1. Useful for GLM, GAM, lasso, and forward selection.
ydat<-as.vector(ydat)
B<-length(ydat)  
inr <- 0
if (alph > 0.1)
    {qn <- min(1 - alph + 0.05, 1 - alph + d/n)}
  if (alph <= 0.1)
    {qn <- min(1 - alph/2, 1 - alph + 10*alph*d/n)}
  pn <- qn
  if(pn < 1 - alph + 0.001)
    qn <- 1 - alph
cc <- ceiling(B * (qn + 1.12*sqrt(alph/B)))
cc <- min(B,cc)
  sy <- sort(ydat)
  up <- sy[cc]
  low <- sy[1]
  olen <- up - low
if(cc < B){
    for(j in (cc + 1):B){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   up <- sy[j]
	   low <- sy[j - cc + 1]
	}
    }
} 
if(low <= yf && up >= yf)
inr <- inr + 1
list(low=low, up=up, inr = inr)}





PHboot<-function(x,time,status,B = 1000){
#needs library(survival), n > 5p, p > 2, 
#want B >= 50p, takes a few seconds
#bootstraps the Cox PH regression full model with the nonparametric bootstrap
x <- as.matrix(x)
n <- length(time)
p <- dim(x)[2]
tdata <- as.data.frame(cbind(x,time,status))
out <- coxph(Surv(time, status) ~., data=tdata)
bhat<-out$coef
betas <- matrix(0,nrow=B,ncol=p)
for(i in 1:B){
    samp <- sample(1:n, replace=TRUE)
    tdat <- tdata[samp,]
    temp <- coxph(Surv(time, status) ~., data=tdat)
    betas[i,] <- temp$coef
   }
list(bhat=bhat,betas=betas)
}



PHbootsim<-function(n=100,p=4,k=1,nruns=100,psi=0.0,B=1000,a=1,gam=1,
clam=0.1,alpha=0.05){
#needs library(survival), n > 5p, p > 2, want B >= 50p, 
#bootstraps the Cox regression full model, takes a long time
#U1 <= k < p, so zeroes are in the model, where k is the number of nonnoise variables
#there are p coefficients for beta in the Weibull regression data 
#need K < p so zeroes are in the model
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
beta <- 0 * 1:p
beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1]
pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
zero <- 0 * 1:p
one <- as.vector(0*1:k + 1)
for(i in 1:nruns) {
	x <- matrix(rnorm(n * p), nrow = n, ncol = p)
        x <- val* x %*% A
        SP <-  x%*%beta    #SP_i  ~ N(0,a^2)
        lambdai <- exp(SP)
       	w <- rexp(n, rate = lambdai)
        y <- w^(1/gam)
      	cen <- rexp(n, rate = clam)
	time <- pmin(y, cen)
	status <- as.numeric(cen >= y)
        tdata <- as.data.frame(cbind(x,time,status))
        out <- coxph(Surv(time, status) ~., data=tdata)
        bhat<-out$coef
        betas <- matrix(0,nrow=B,ncol=p)
        for(i in 1:B){
           samp <- sample(1:n, replace=TRUE)
           tdat <- tdata[samp,]
           temp <- coxph(Surv(time, status) ~., data=tdat)
           betas[i,] <- temp$coef
         }
 for (j in 1:p){
       tem <- shorth3(betas[,j],alpha=alpha)
       if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
           cicov[j] <- cicov[j] + 1
           avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
   } 
#test whether the last p-k values of beta are 0
                gg <- p - k 
                tstat <- bhat[(k+1):p]
           tem <- confreg(betas[,(k+1):p],g=gg,that=tstat,alpha=alpha)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim  
#test whether the first k values of beta are (1,...,1)
                gg <- k 
                tstat <- bhat[1:k] 
           tem <- confreg(betas[,1:k],g=gg,that=tstat,alpha=alpha)
                D0 <- sqrt(mahalanobis(one, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <- avelen[pp6] + tem$brlim   
}
cicov <- cicov/nruns
avelen <- avelen/nruns
list(cicov=cicov,avelen=avelen,beta=beta,k=k)
}



phdata<-function(n = 100, q = 3, gam = 1, clam = 0.1){
# Generates data for Cox proportional hazards regression.
# Follows Zhou (2001)
# Get Weibull regression data with g(Y) = Y^(1/k) and
# h_(g(Y))(t) = k t^(k-1) exp(SP).
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
	beta <- 1 + 0 * 1:q
	x <- matrix(rnorm(n * q), nrow = n, ncol = q)/2
	SP <- x %*% beta
	lambdai <- exp(SP)
	y <- rexp(n, rate = lambdai)
	gy <- y^(1/gam)
	cen <- rexp(n, rate = clam)
	cengy <- pmin(gy, cen)
	status <- as.numeric(cen >= gy)	##change formula for coxph with q
#out <- coxph(Surv(cengy, status) ~ x[, 1] + x[, 2] + x[, 3])
#ESP <- x %*% out$coef
	list(x = x, time = cengy, status = status)
}


phdata2<-function(n=100,p=4,k=1,psi=0.0,a=1,gam=1,clam=0.1){
#Use 1 <= k <= p, where k is the number of nonnoise variables.
#there are p coefficients for beta in the Weibull regression data 
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
#set.seed(974)  ##need p>2
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
b <- 0 * 1:p
b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                x <- val* x %*% A
                SP <-  x%*%b    #SP_i  ~ N(0,a^2)
        lambdai <- exp(SP)
       	w <- rexp(n, rate = lambdai)
        y <- w^(1/gam)
      	cen <- rexp(n, rate = clam)
	ceny <- pmin(y, cen)
	status <- as.numeric(cen >= y)
        tdata <- as.data.frame(cbind(x,y,status))
#out <- coxph(Surv(y, status) ~ ., data=tdata)
#ESP <- x %*% out$coef
	list(betaP=b, x = x, time = ceny, status = status)
}


phgfit2<-function(x, time, status, slices = 9){
# Checks goodness of fit of Cox PH model, time is a vector of times. 
# KM curves should have the same shape if PH model is valid.
# status is 0 for censored times and 1 otherwize.
# In R type library(survival)
#  tem <- data.frame(time,status,x)
tem <- data.frame(cbind(time,status,x))
out <- coxph(Surv(time, status) ~ ., tem)
ESP <- x %*% out$coef
indx <- sort.list(ESP)
n <- length(time)
val <- as.integer(n/slices)
if(slices <= 4) par(mfrow = c(2, 2))
if( (slices == 5) || (slices ==  6)) par(mfrow = c(2, 3))
if(slices >= 7) par(mfrow = c(3, 3))
    for(i in 1:slices){
        lo <- (i-1)*val + 1
        hi <- i*val
        if(i == slices) hi <- n
	tt <- time[indx[lo:hi]]
	ts <- status[indx[lo:hi]]
	zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
	plot(zksi)
    }
par(mfrow=c(1,1))
}


phsim<-function(n = 100, q = 3, gam = 1, nruns = 1){
# Simulates censored response plots for Cox PH data.
# Calls phdata and seyp.
	for(i in 1:nruns) {
		out <- phdata(n, q, gam)
		seyp(out$x, out$time, out$status)
	}
}


phsim2 <- function(n = 100, p = 3, gam = 1, slices = 9, nruns = 1){
# Simulates KM curves in slices. 
# KM curves should have the same shape if PH model is valid.
# Calls phdata and phgfit2.
for(i in 1:nruns) {
   out <- phdata(n, p, gam)
   phgfit2(out$x, out$time, out$status, slices)
	}
}


phsim5 <- function(n = 99,  gam = 1, slices = 9){
# Simulates sliced survival plots for Cox PH regression with 
# pointwise CI bands. Follows Zhou (2001) to
# get Weibull regression data with g(Y) = Y^(1/k) and
# h_(g(Y))(t) = k t^(k-1) exp(SP).
# In R, type library(survival)
      p <- 3 ## change formula with p
      beta <- 1 + 0 * 1:p
	x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2
	SP <- x %*% beta
	lambdai <- exp(SP)
	y <- rexp(n, rate = lambdai)
	gy <- y^(1/gam)
	cen <- rexp(n, rate = 0.1)
	time <- pmin(gy, cen)
	status <- as.numeric(cen >= gy)	
	##change formula for coxph with p
x1<- x[,1]; x2 <- x[,2]; x3<-x[,3]
xdata<-data.frame(cbind(time,status,x1,x2,x3))
      out <- coxph(Surv(time, status) ~ x1 + x2 + x3,data=xdata)
      ESP <- x %*% out$coef
indx <- sort.list(ESP)
val <- as.integer(n/slices)
if(slices <= 4) par(mfrow = c(2, 2))
if( (slices == 5) || (slices ==  6)) par(mfrow = c(2, 3))
if(slices >= 7) par(mfrow = c(3, 3))
for(i in 1:slices){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == slices) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv and plot with p 
    midpt <- lo + floor((hi-lo)/2)
    nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3])
nv<-data.frame(time=1,status=1,x1=nvt[1],x2=nvt[3],x3=nvt[3])
plot(survfit(coxph(Surv(time, status) ~ x1 + x2 + x3), newdata = nv))
    lines(zksi,type="p")
}
par(mfrow=c(1,1))
list(coef = out$coef)
}


PHsplitsim<-function(n = 100, p = 4, k=1, n1=30, J=5, nruns = 100,  
psi = 0.0,  a=1, gam= 1, B=1000, clam=0.1, alpha = 0.05){
#needs library(glmnet), library(survival), n1>=30 gets rid of the warnings
#J is an integer between 0 and 5
#noundfit is the number of times in nruns that lasso did not underfit
#nd is the size of the training data set selected by sequential data splitting
#ad is the number of predictors, including a constant, selected by lasso for 
#the selected training data set: want ad not much larger than k
#n1 is the initial size of the training data set, want n1 <= n/2
#Simulates sequential data splitting for Cox proportional hazards regression 
#using lasso #with 10-fold cross validation. 
#Use 1 <= k < p so zeroes are in the model, k= the number of nonnoise variables
#there are p coefficients for beta in the Weibull regression data 
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
beta <- 0 * 1:p
beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1]
vars <- 1:p
zero <- 0 * 1:p
one <- as.vector(0*1:k + 1)
RLpilen <- 1:nruns
RLpicov <- 0
splitpilen <- 1:nruns
splitpicov <- 0
noundfit <- 0
indx <- 1:n 
nds<-1:nruns
ads<-nds
        #check for bad values of n1
fhalf <- floor((n-J)/2)
if(n<40) n1 <- max(1,fhalf)
   else
   if(floor(n/(2*n1))>1000) n1 <- floor(n/2000)
if(n1 > fhalf) n1 <- max(1,fhalf)
	for(i in 1:nruns) {
                x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                x <- val* x %*% A
                xf <- val* rnorm(p) %*% A
                SP <-  x%*%beta    #SP_i  ~ N(0,a^2)
                SPf <- xf%*%beta
                lambdai <- exp(SP)
                lambdaf <- exp(SPf)
         	w <- rexp(n, rate = lambdai)
                y <- w^(1/gam)
                wf <- rexp(1, rate = lambdaf)
                yf <- wf^(1/gam)  #uncensored
         	cen <- rexp(n, rate = clam)
	        timed <- pmin(y, cen)
	        statusd <- as.numeric(cen >= y)		
	        #make a Weibull PH data set                            
#get lasso variable selection PI, #if n >= 5*pp
      time <- timed
      status <- statusd
      ycen <- cbind(time,status)                
      out<-cv.glmnet(x,ycen,family="cox")
      lam <- out$lambda.min 
      lcoef <- as.vector(predict(out,type="coefficients",s=lam))               
      vin <- vars[lcoef!=0]
      pp <- length(vin)
      if(n >= 5*pp ){
         tdata <- as.data.frame(cbind(x[,vin],time,status))
         outw <- survreg(Surv(time, status) ~ ., data = tdata)
                int <- outw$coef[1]
                bhat <- outw$coef[-1]
                sig <- outw$scale  
                ghat=1/sig
           espw <- -xf[vin]%*%bhat/sig
           lamxf <- exp(-int/sig)*exp(espw)
           sc <- 1/lamxf^(1/ghat)
           ydat <- rweibull(B,shape=ghat,scale=sc)
           tem <- mshpi(yf=yf,ydat=ydat,n,d=pp,alph=alpha)
           RLpilen[i] <- tem$up - tem$low
           RLpicov <- RLpicov + tem$inr 
                    }        
#use sequential data splitting
                perm <- sample(indx,n)
                H <- perm[1:n1]
                xH <- x[H,]
                yH <- y[H]
                time <- timed[H]
                status <- statusd[H]
                ycenH <- cbind(time,status)
                nd<-n1 #the names need to be "time" and "status"
                out<-cv.glmnet(xH,ycenH,family="cox")
                lam <- out$lambda.min                
 lcoef <- as.vector(predict(out,type="coefficients",s=lam))               
                vin <- vars[lcoef!=0]
                pp <- length(vin)
           while(nd < fhalf && nd < 10*pp){
                if((nd+n1) <= fhalf){
                perm<-perm[-(1:n1)]
                H <- c(H,perm[1:n1])
                xH <- x[H,]
                yH <- y[H] 
                time <- timed[H]
                status <- statusd[H]
                ycenH <- cbind(time,status)               
                out<-cv.glmnet(xH,ycenH,family="cox")
                lam <- out$lambda.min 
               # fit <- predict(out,s=lam,newx=xH)
   lcoef <- as.vector(predict(out,type="coefficients",s=lam))
                vin <- vars[lcoef!=0]
                pp <- length(vin)
                nd<-nd+n1
                }
                else
                 nd<-nd+n1
            }
            if(nd > fhalf) nd <- nd-n1
            if(nd < fhalf && nd < 10*pp){
               md <- fhalf - nd
               nd <- nd + md
               perm<-perm[-(1:n1)]
               H <- c(H,perm[1:md])
               xH <- x[H,]
               yH <- y[H]  
               time <- timed[H]
               status <- statusd[H]
               ycenH <- cbind(time,status)              
               out<-cv.glmnet(xH,ycenH,family="cox")
               lam <- out$lambda.min 
               #fit <- predict(out,s=lam,newx=xH)
  lcoef <- as.vector(predict(out,type="coefficients",s=lam))
               vin <- vars[lcoef!=0]
               pp <- length(vin)
            }          
            nds[i] <- nd
            ads[i] <- pp
            if(length(vin) >= k){
                  if(vin[k]==k) 
                    noundfit <- noundfit + 1}
            xV <- x[-H,]          
#get lasso variable selection data splitting PI  
      time <- timed[-H]
      status <- statusd[-H]
      ycen <- cbind(time,status)                
      out<-cv.glmnet(xV,ycen,family="cox")
      lam <- out$lambda.min 
      lcoef <- as.vector(predict(out,type="coefficients",s=lam))               
      vin <- vars[lcoef!=0]
      pp <- length(vin)
      nV <- n - nd
      if(nV >= 5*pp ){
         tdata <- as.data.frame(cbind(xV[,vin],time,status))
         outw <- survreg(Surv(time, status) ~ ., data = tdata)
                int <- outw$coef[1]
                bhat <- outw$coef[-1]
                sig <- outw$scale  
                ghat=1/sig
           espw <- -xf[vin]%*%bhat/sig
           lamxf <- exp(-int/sig)*exp(espw)
           sc <- 1/lamxf^(1/ghat)
           ydat <- rweibull(B,shape=ghat,scale=sc)
           tem <- mshpi(yf=yf,ydat=ydat,n,d=pp,alph=alpha)
           splitpilen[i] <- tem$up - tem$low
           splitpicov <- RLpicov + tem$inr          
      }      
}
RLpicov <- RLpicov/nruns
RLpilen <- mean(RLpilen)
splitpicov <- splitpicov/nruns
splitpilen <- mean(splitpilen)
mnnd<-mean(nds)
mnad<-mean(ads)
list(mnnd=mnnd,mnad=mnad,LVSpicov=RLpicov,LVSpilen=RLpilen,
splitpicov=splitpicov,splitpilen=splitpilen,
beta=beta,k=k,noundfit=noundfit,n1=n1)}


predsim<-function(n = 100, p = 4, nruns = 100, xtype = 1, dd = 1, eps = 0.25, alpha = 0.1)
{# MAY NOT WORK IF p = 1. 
# Gets coverages of nonparametric, semiparametric and 
# parametric MVN prediction regions.
# Multiply x by A where xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 10 (with delta = eps) for delta Np(0,I) + (1-delta) Np(0, 25 I)
# 5 for lognormal, 
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd 
# mahalanobis gives squared Maha distances 
# set.seed(974)
	ccvr <- 0
	scvr <- 0
	rcvr <- 0
	volc <- 1:nruns
	vols <- volc
	volr <- volc	#up <- 1 - alpha 
	up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n))
	if(alpha > 0.1)
	  up <- min((1 - alpha + 0.05), (1 - alpha + p/n))
        qn <- up
        if(qn < 1 - alpha + 0.001)
          up <- 1 - alpha
	A <- sqrt(diag(1:p))
	m <- n + 1
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(m * p), nrow = m, ncol = p)
		if(xtype == 2) {
			zu <- runif(m)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(xtype == 3) {
			zu <- runif(m)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(m)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(xtype == 5)
			x <- exp(x)
		if(xtype == 6) {
			zu <- sqrt(rchisq(m, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(m, 5)/5)
			x <- x/zu
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(m, 19)/19)
			x <- x/zu
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(m, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10) {
			zu <- runif(m)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		x <- x %*% A
		xx <- x[ - m,  ]
		center <- apply(xx, 2, mean)
		cov <- var(xx)
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		if(mahalanobis(t(x[m,  ]), center, cov) <= hsq)
			ccvr <- ccvr + 1
		volc[i] <- sqrt(hsq)^p * prod(diag(chol(cov)))
		out <- covrmvn(xx)
		center <- out$center
		cov <- out$cov
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		dsq <- mahalanobis(t(x[m,  ]), center, cov)
		if(dsq <= hsq)
			scvr <- scvr + 1
		sqrtdet <- prod(diag(chol(cov)))
		vols[i] <- sqrt(hsq)^p * sqrtdet
		hsq <- qchisq(up, p)
		if(dsq <= hsq)
			rcvr <- rcvr + 1
		volr[i] <- sqrt(hsq)^p * sqrtdet
	}
	ccvr <- ccvr/nruns
	scvr <- scvr/nruns
	rcvr <- rcvr/nruns	#get a measure of efficiency wrt vols, so eff(vols) = 1
	vols <- mean(vols)
	volc <- mean(volc)/vols
	volr <- mean(volr)/vols
	vols <- 1
	list(ncvr = ccvr, scvr = scvr, mcvr = rcvr, voln = volc, vols = vols, volm = volr, up = up)
}



RLPHboot<-function(x,time,status,B=1000){
#needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, 
#bootstraps the Cox regression relaxed lasso, takes a few minutes
x <- as.matrix(x)
n <- length(time)
p <-  dim(x)[2]
vars <- 1:p
y<-cbind(time,status)
outlasso<-cv.glmnet(x,y,family="cox")
lam <- outlasso$lambda.min
lcoef <- as.vector(predict(outlasso,type="coefficients",s=lam)) 
vin <- vars[lcoef!=0]
tx <- x[,vin]
tdat <- as.data.frame(cbind(tx,time,status))
sub <-  coxph(Surv(time, status) ~., data=tdat)
bhatimin0 <- 0 * 1:p
bhatimin0[vin] <- sub$coef
betas <- matrix(0,nrow=B,ncol=p)
for(i in 1:B){
    samp <- sample(1:n, replace=TRUE)
    tx <- x[samp,]
    ty <- y[samp,]
    ttime <- time[samp]
    tstatus <- status[samp]
    temp<-cv.glmnet(tx,ty,family="cox")
    lam <- temp$lambda.min
    lcoef <- as.vector(predict(temp,type="coefficients",s=lam))
    vin <- vars[lcoef!=0]
    tx <- tx[,vin]    
    tdat <- as.data.frame(cbind(tx,ttime,tstatus))
    sub <-  coxph(Surv(ttime, tstatus) ~., data=tdat)
    betas[i,vin] <- sub$coef
   }
list(bhatimin0=bhatimin0,betas=betas)
}


RLPHboot2<-function(x,time,status,c=0.01,aug=F,B=1000){
#needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, 
#bootstraps the Cox regression relaxed lasso, 
#using bhatVS and bhatMIX,   takes a few minutes
#If augm neq F, adds cB full model bootstrap samples so S*_T is better
#conditioned.
x <- as.matrix(x)
n <- length(time)
p <-  dim(x)[2]
vars <- 1:p
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
y<-cbind(time,status)
outlasso<-cv.glmnet(x,y,family="cox")
lam <- outlasso$lambda.min
lcoef <- as.vector(predict(outlasso,type="coefficients",s=lam)) 
vin <- vars[lcoef!=0]
tx <- x[,vin]
tdat <- as.data.frame(cbind(tx,time,status))
sub <-  coxph(Surv(time, status) ~., data=tdat)
bhatimin0 <- 0 * 1:p
bhatimin0[vin] <- sub$coef
betas <- matrix(0,nrow=B,ncol=p)
btmix <- betas
for(i in 1:B){
    samp <- sample(1:n, replace=TRUE)
    tx <- x[samp,]
    ty <- y[samp,]
    ttime <- time[samp]
    tstatus <- status[samp]
    temp<-cv.glmnet(tx,ty,family="cox")
    lam <- temp$lambda.min
    lcoef <- as.vector(predict(temp,type="coefficients",s=lam))
    vin <- vars[lcoef!=0]
    tx <- tx[,vin]    
    tdat <- as.data.frame(cbind(tx,ttime,tstatus))
    sub <-  coxph(Surv(ttime, tstatus) ~., data=tdat)
    betas[i,vin] <- sub$coef
# get betahatMIX*
    samp <- sample(1:n, replace=TRUE)
    tx <- x[samp,]
    ty <- y[samp,]
    ttime <- time[samp]
    tstatus <- status[samp]
    tx <- tx[,vin]    
    tdat <- as.data.frame(cbind(tx,ttime,tstatus))
    sub <-  coxph(Surv(ttime, tstatus) ~., data=tdat)
    btmix[i,vin] <- sub$coef
   }
if(aug == F)
{betas <- betas[1:B,]; btmix <- btmix[1:B,]}
else {
  for(i in bp1:bpd){
    samp <- sample(1:n, replace=TRUE)
    tdat <- tdata[samp,]
    temp <- coxph(Surv(time, status) ~., data=tdat)
    betas[i,]<-temp$coef
      }
   btmix[bp1:bpd,] <- betas[bp1:bpd,]
}
list(bhatimin0=bhatimin0,betas=betas,btmix=btmix)
}



RLPHbootsim<-function(n=100,p=4,k=1,nruns=100,psi=0.0,B=1000,a=1,gam=1,
clam=0.1,alpha=0.05)
{
#needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, 
#bootstraps the Cox regression  lasso variable selection, takes a long time
#Use 1 <= k < p, so zeroes are in the model, k is the number of nonnoise variables.
#there are p coefficients for beta in the Weibull regression data 
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
beta <- 0 * 1:p
beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1]
vars <- 1:p
pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
zero <- 0 * 1:p
one <- as.vector(0*1:k + 1)
dd <- 1:nruns
for(i in 1:nruns) {
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                x <- val* x %*% A
                SP <-  x%*%beta    #SP_i  ~ N(0,a^2)
        lambdai <- exp(SP)
       	w <- rexp(n, rate = lambdai)
        y <- w^(1/gam)
      	cen <- rexp(n, rate = clam)
	time <- pmin(y, cen)
	status <- as.numeric(cen >= y)
        tdata <- as.data.frame(cbind(x,time,status))
y<-cbind(time,status)
outlasso<-cv.glmnet(x,y,family="cox")
lam <- outlasso$lambda.min
lcoef <- as.vector(predict(outlasso,type="coefficients",s=lam)) 
vin <- vars[lcoef!=0]
tx <- x[,vin]
tdat <- as.data.frame(cbind(tx,time,status))
sub <-  coxph(Surv(time, status) ~., data=tdat)
bhat <- zero
bhat[vin] <- sub$coef
dd[i]<-length(sub$coef)
betas <- matrix(0,nrow=B,ncol=p) #nonparametric bootstrap
for(m in 1:B){
    samp <- sample(1:n, replace=TRUE)
    tx <- x[samp,]
    ty <- y[samp,]
    ttime <- time[samp]
    tstatus <- status[samp]
    temp<-cv.glmnet(tx,ty,family="cox")
    lam <- temp$lambda.min
    lcoef <- as.vector(predict(temp,type="coefficients",s=lam))
    vin <- vars[lcoef!=0]
    tx <- tx[,vin]    
    tdat <- as.data.frame(cbind(tx,ttime,tstatus))
    sub <-  coxph(Surv(ttime, tstatus) ~., data=tdat)
    betas[m,vin] <- sub$coef
   }
 for (j in 1:p){
       tem <- shorth3(betas[,j],alpha=alpha)
       if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
           cicov[j] <- cicov[j] + 1
           avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
   }  
#test whether the last p-k values of beta are 0
                gg <- p - k 
                tstat <- bhat[(k+1):p]
           tem <- confreg(betas[,(k+1):p],g=gg,that=tstat,alpha=alpha)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim  
#test whether the first k values of beta are (1,...,1)
                gg <- k 
                tstat <- bhat[1:k] 
           tem <- confreg(betas[,1:k],g=gg,that=tstat,alpha=alpha)
                D0 <- sqrt(mahalanobis(one, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <- avelen[pp6] + tem$brlim  
}
cicov <- cicov/nruns
avelen <- avelen/nruns
mndd <- mean(dd)
list(mndd=mndd,cicov=cicov,avelen=avelen,beta=beta,k=k)
}



RLPHbootsim2<-function(n=100,p=4,k=2,nruns=100,psi=0.0,BB=1000,a=1,gam=1,
clam=0.1,cc=0.01,augm=F,alph=0.05){

#needs library(glmnet), library(survival), n > 5p, p > 2, want B >= 50p, 
##Convergence problems is psi much larger than 1/sqrt(p).
#Bootstraps the Cox regression lasso variable selection and bhatMIX. 
# takes a long time
#Use 1 <= k < p so zeroes are in the model, k is the number of nonnoise variables.
#there are p coefficients for beta in the Weibull regression data 
#need p > 1, beta_A = -(1/gam, ..., 1/gam, 0, ..., 0)^T with p-k zeroes
# beta_P = (1,...,1,0,...,0)^T with k ones and p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
beta <- 0 * 1:p
beta[1:k] <- 1 #beta[1:0] acts like beta[1:1] = beta[1]
vars <- 1:p
pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
zero <- 0 * 1:p
one <- as.vector(0*1:k + 1)
dd <- 1:nruns
for(i in 1:nruns) {
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                x <- val* x %*% A
                SP <-  x%*%beta    #SP_i  ~ N(0,a^2)
        lambdai <- exp(SP)
       	w <- rexp(n, rate = lambdai)
        y <- w^(1/gam)
      	cen <- rexp(n, rate = clam)
	time <- pmin(y, cen)
	status <- as.numeric(cen >= y)
#make a Weibull PH model 
        out <-RLPHboot2(x,time,status,c=cc,aug=augm,B=BB)
#bootstrap the lasso variable selection model
                for (j in 1:p){
                   tem <- shorth3(out$betas[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
                   tem <- shorth3(out$btmix[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                    cicovmix[j] <- cicovmix[j] + 1
                    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
                    } 
                #test whether the last p-k values of beta are 0
                gg <- p - k 
                tstat <- out$bhatimin0[(k+1):p]
           tem <- confreg(out$betas[,(k+1):p],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
           tem <- confreg(out$btmix[,(k+1):p],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
#test whether the first k values of beta are (1,...,1)
                gg <- k 
                tstat <- out$bhatimin0[1:k] 
           tem <- confreg(out$betas[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(one, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
          tem <- confreg(out$btmix[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(one, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
           }
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
list(cicov=cicov,avelen=avelen,cicovmix=cicovmix,avelenmix=avelenmix,
beta=beta,k=k)
}


seyp<-function(x, time, status){
# Makes the censored response plot for Cox proportional hazards regression.
        xdata<-data.frame(cbind(time,status,x))
	out <- coxph(Surv(time, status) ~ ., data=xdata)
	ESP <- x %*% out$coef
	plot(ESP, time, type = "n")
	points(ESP[status == 1], time[status == 1], pch = 3)
	points(ESP[status == 0], time[status == 0], pch = 1)
}


shorth3<-function(y, alpha = 0.05){
# computes the shorth(c) interval [Ln,Un] for c = cc.
#shorth lists Ln and Un using Frey's correction
n <- length(y)
cc <- ceiling(n * (1 - alpha + 1.12*sqrt(alpha/n)))
cc <- min(n,cc)
  sy <- sort(y)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
} 
Ln <- rlow; Un <- rup
list(shorth=c(Ln, Un))
}


swhat <- function(stime,bhat,nv,ghat,lhat){
# Finds estimated survival function for Weibull regression
# evaluated at x = nv.
r <- length(stime)
shat <- 1:r
for(i in 1:r){
shat[i] <- exp(-exp(-ghat*bhat%*%nv)*lhat*stime[i]^ghat)
}
list(shat=shat)
}



vlung2 <- function(sexv=2){
# uses 4 slices instead of the 9 used by vlung
# Check goodness of fit of Cox PH for R lung data.
# for females use sexv = 2
# for males use sexv = 1, change the 2 to a 1
flung<-lung[lung$sex==sexv,]
nflung<-na.omit(flung[,c("time","status","ph.ecog","ph.karno",
"pat.karno","wt.loss")])
out <- coxph(Surv(time, status) 
~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung)
x <- as.matrix(nflung[,c(3,4,5,6)])
time <- as.vector(nflung[,1])
status <- as.vector(nflung[,2])
ESP <- x %*% out$coef
indx <- sort.list(ESP)
n <- 85
val <- as.integer(n/4)
cmar<-par("mar")
par(mfrow = c(2, 2))
#par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:4){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == 4) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3],x[indx[midpt],4])
#take a typical value of time
nv<-data.frame(time=300,status=1,ph.ecog=nvt[1],ph.karno=nvt[2],
pat.karno=nvt[3],wt.loss=nvt[4])
plot(survfit(coxph(Surv(time, status) 
~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung), newdata = nv),
xlab="Time",ylab="Estimated S(t)")
lines(zksi,type="p")    
}
par(mfrow=c(1,1))
par(mar=cmar)}



vnwtco <- function(){
#visualizing NWTCO data
##type library(survival)
#>nwtco[1,]
#  seqno instit histol stage study rel edrel age in.subcohort
#     1      2      2     1     3   0  6075  25        FALSE
z<-coxph(Surv(edrel,rel)~histol+instit+age+stage,data=nwtco)
summary(z)
ccex<-par("cex")
par(mfrow=c(2,2))
par(cex=0.3)
zz<- cox.zph(z)
# adding cex=0.1 has no effect
plot(zz, cex=0.1)
print(zz)
par(cex=ccex)
#test seems to contradict plots which have fairly horizontal loess
time <- as.vector(nwtco[,7])
status <- as.vector(nwtco[,6])
out <- coxph(Surv(time, status)~histol+instit+age+stage,data=nwtco) 
x <- as.matrix(nwtco[,c(3,2,8,4)])
ESP <- x %*% out$coef
indx <- sort.list(ESP)
n <- 4028
##change par(mfrow=c(2,3)) with slices
slices<-6
val <- as.integer(n/slices)
cmar<-par("mar")
ccex<-par("cex")
par(mfrow = c(2, 3))
par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:slices){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == slices) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
    nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3],
    x[indx[midpt],4])
#use typical values for seqno,study,time, and in.subcohort
nv<-data.frame(seqno=4089,instit=nvt[2],histol=nvt[1],stage=nvt[4],study=3, status=1,time=300,age=nvt[3],in.subcohort=TRUE)
plot(survfit(coxph(Surv(time, status)~ histol+instit+age+stage,data = nwtco), newdata = nv),
xlab="Time",ylab="Estimated S(t)",cex=0.1)
    lines(zksi,cex=0.1,type="p")
    }
par(mfrow=c(1,1))
par(mar=cmar)
par(cex=ccex)
}



vovar <- function(){
####bug in new version of R for slice survival plot
# uses 4 slices, calls swhat
# check goodness of fit of Cox PH and Weibull PH
# models for the ovarian cancer data

time <- c(156, 1040, 59, 421, 329, 769, 365, 770, 1227, 
268, 475, 1129, 464, 1206, 638, 563, 1106, 431, 855, 
803, 115, 744, 477, 448, 353, 377)

status <- c(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0)

treat <- c(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0)

age <- c(66, 38, 72, 53, 43, 59, 64, 57, 59, 74, 59, 53,
56, 44, 56, 55, 44, 50, 43, 39, 74, 50, 64, 56, 63, 58)

ovar <- cbind(time,status,treat,age)


#paste these commands for the LCR plot
z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull")
AFTESP <- 11.548 -.561*ovar[,3] -.079*ovar[,4]
logT <- log(ovar[,1])
plot(AFTESP,logT,pch = ' ')
points(AFTESP[ovar[,2]==0],log(ovar[ovar[,2]==0,1]),pch=1)
points(AFTESP[ovar[,2]==1],log(ovar[ovar[,2]==1,1]),pch=3)
abline(0,1) 

#paste these commands for the Weibull EE plot
z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull")
zx <- cbind(ovar[,3],ovar[,4])
zc <- coxph(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4])
sighat<-z$scale
ESPC <- zx%*%zc$coef
ESPW <- -zx%*%z$coef[-1]/sighat
plot( ESPW, ESPC)
abline(0,1)
title("a) Weibull")
cor( ESPW, ESPC)

#paste these commands for the Exponential EE plot

ze <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="exponential")
ESPE <- -zx%*%ze$coef[-1]
plot( ESPE, ESPC)
abline(0,1)
title("b) Exponential")

##########bug in plot below for new version of R

#type vovar() to produce the slice survival plot
ghat <- 1/z$scale
ahat <- z$coef[1]
bhat <- z$coef[-1]
lhat <- exp(-ahat*ghat)
stime <- sort(time)
x <- zx
ESP <- ESPC
indx <- sort.list(ESP)
n <- 26
val <- as.integer(n/4)
cmar<-par("mar")
par(mfrow = c(2, 2))
#par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:4){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == 4) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
    nv <- c(x[indx[midpt],1],x[indx[midpt],2])
plot(survfit(coxph(Surv(time, status)~ ovar[,3]+ovar[,4]),
         newdata = nv),xlab="Time",ylab="Estimated S(t)")
    sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat
    points(stime,sfit,pch=3)
    lines(zksi,type="p")
    }
par(mfrow=c(1,1))
par(mar=cmar)
}


weyp<-function(x, time, status){
# Makes the log censored response plot for Weibull proportional hazards regression.
        xdata<-data.frame(cbind(time,status,x))
	out <- survreg(Surv(time, status) ~ ., data=xdata)
	ESP <- x %*% out$coef[-1] + out$coef[1]
	plot(ESP, log(time), type = "n")
	points(ESP[status == 1], log(time[status == 1]), pch = 3)
	points(ESP[status == 0], log(time[status == 0]), pch = 1)
	abline(0, 1)
	#abline(lsfit(ESP[status == 1], log(time[status == 1]))$coef)
}



wphsim <- function(n = 99,  gam = 1, slices = 9){
# Generates data for Weibull PH regression using Zhou (2001).
# Makes a slice survival plot for Weibull regression.
# The AFT ESP is used instead of the PH ESP, so survival
# increases as the AFT ESP increases.
# Uses crosses for Weibull and circles for KM.
# Get Weibull regression data with g(Y) = Y^(1/k) and
# h_(g(Y))(t) = k t^(k-1) exp(SP).
# 
# Calls function swhat.
# type library(survival)
p <- 3 ## change formula with p
beta <- 1 + 0 * 1:p
x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2
SP <- x %*% beta
lambdai <- exp(SP)
y <- rexp(n, rate = lambdai)
gy <- y^(1/gam)
cen <- rexp(n, rate = 0.1)
time <- pmin(gy, cen)
status <- as.numeric(cen >= gy)	
##change formula for survreg with p
out <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
ESP <- x %*% out$coef[-1]
ghat <- 1/out$scale
ahat <- out$coef[1]
bhat <- out$coef[-1]
lhat <- exp(-ahat*ghat)
stime <- sort(time)
indx <- sort.list(ESP)
val <- as.integer(n/slices)
if(slices <= 4) par(mfrow = c(2, 2))
if( (slices == 5) || (slices ==  6)) par(mfrow = c(2, 3))
if(slices >= 7) par(mfrow = c(3, 3))
for(i in 1:slices){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == slices) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
    nv <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3])
    sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat
    plot(stime,sfit,pch=3)
    lines(zksi,type="p")
    }
par(mfrow=c(1,1))
list(coef = out$coef)
}


wpisim<-function(n = 100, p = 4, k = 1, nruns = 100,  
psi = 0.0, J = 5, B=1000,  a = 1, gam=1, clam=0.1, alpha = 0.05){
#Program can fail if n/p is small and nruns > 100.
#Use 1 <= k <= p, where k is the number of nonnoise variables.
#Simulates the  Olive et al. (2019) PIs for Weibull regression.
#PIs for full model, need library(survival)
#there are p coefficients
#need p > 1, beta = (1, ..., 1, 0, ..., 0) with k ones, p-k zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists. SP~N(0,a^2), and a near 1 is ok. 
#set.seed(974)  ##need p>2
fullpilen <- 1:nruns
fullpicov <- 0
rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
val <- a/sqrt(k*(1 + (p-1)*psi^2) + k*(k-1)*(2*psi + (p-2)*psi^2))
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1
b <- 0 * 1:p
b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                x <- val* x %*% A
                xf <- val* rnorm(p) %*% A
                SP <-  x%*%b    #SP_i  ~ N(0,a^2)
        lambdai <- exp(SP)
        lambdaf <- exp(xf%*%b)
	w <- rexp(n, rate = lambdai)
        wf <- rexp(1, rate = lambdaf)
	y <- w^(1/gam)
        yf <- wf^(1/gam)  #uncensored
	cen <- rexp(n, rate = clam)
	ceny <- pmin(y, cen)
	status <- as.numeric(cen >= y)
        statusf <- c(status,1)
                tdata <- as.data.frame(cbind(x,ceny,status))
                cenyn <- c(y,yf)
                xn <- rbind(x,xf)
                tdat <- as.data.frame(cbind(xn,cenyn,statusf))
		#make a WR data set
#get full model WR PI
                if(n >= 5*p){
           outw <- survreg(Surv(ceny, status) ~ ., data = tdata)
                int <- outw$coef[1]
                bhat <- outw$coef[-1]
                sig <- outw$scale  
                ghat=1/sig
           espw <- -xf%*%bhat/sig
           lamxf <- exp(-int/sig)*exp(espw)
           sc <- 1/lamxf^(1/ghat)
           ydat <- rweibull(B,shape=ghat,scale=sc)
           #tem <- shpi(yf=yf,ydat=ydat,alph=alpha)
           tem <- mshpi(yf=yf,ydat=ydat,n,d=p,alph=alpha)
           fullpilen[i] <- tem$up - tem$low
           fullpicov <- fullpicov + tem$inr 
           } 
  }
fullpimnlen <- mean(fullpilen)
fullpicov <- fullpicov/nruns
list(int=int,beta=b,fullpicov=fullpicov,fullpimenlen=fullpimnlen)
}


wregsim<-function(n = 100, q = 3, gam = 1, nruns = 1){
# Simulates log censored response plots for Weibull PH data.
# calls phdata and weyp
	for(i in 1:nruns) {
		out <- phdata(n, q, gam)
		weyp(out$x, out$time, out$status)
	}
}


wregsim2<-function(n = 100, p=3, gam = 1, nruns = 1){
# Simulates EE plots for Weibull proportional hazards AFT data.
# calls phdata 
# R users need to type library(survival) 
        corr <- 1:nruns
	for(i in 1:nruns) {
		out <- phdata(n, p, gam)
		x <- out$x
		time <- out$time
		status <- out$status
                dat<-data.frame(cbind(time,status,x))
		outp <- coxph(Surv(time, status) ~ ., data=dat)
		outw <- survreg(Surv(time, status) ~ ., data = dat)
                shat <- outw$scale
		ESPC <- x %*% outp$coef
		ESPW <- -(x %*% outw$coef[-1])/shat
		plot( ESPW, ESPC)
                corr[i] <- cor(ESPW, ESPC)
		abline(0, 1)
	}
        mncorr <- mean(corr)
        list(mncorr=mncorr)
}


wregsim3<-function(n = 100, p=3, gam = 1, lam = 0.01, nruns = 100){
# See if alphahat and betahat are consistent estimators.
# Calls phdata. 
# want mnint = 0, mncoef[i] = -1/gam, mnscale = 1/gam
	wout <- matrix(0, nrow = nruns, ncol = (p+2))
	for(i in 1:nruns) {
		out <- phdata(n, p, gam, lam)
		x <- out$x
		time <- out$time
		status <- out$status
                dat<-data.frame(cbind(time,status,x))
		outw <- survreg(Surv(time, status) ~ ., data = dat)
		wout[i, 1:(p+1)] <- outw$coef
		wout[i, (p+2)] <- outw$scale
	}
	wmn <- apply(wout, 2, mean)
	mnint <- wmn[1]
	mncoef <- wmn[2:(p+1)]
	mnscale <- wmn[(p+2)]
	list(mnint = mnint, mncoef = mncoef, mnscale = mnscale)
}


wregsim4<-function(n=100,p=4,k=1,psi=0,a=1,gam=1,clam=0.01, nruns = 100){
# See if alphahat and betahat are consistent estimators for the WAFT model.
# Calls phdata2. beta_A = -(1/gam,...,1/gam,0,...,0)^T with k ones
# want mnint = 0, mncoef = beta_A, mnscale = 1/gam
	wout <- matrix(0, nrow = nruns, ncol = (p+2))
	for(i in 1:nruns) {
		out <- phdata2(n, p, k, psi, a, gam, clam)
		x <- out$x
		time <- out$time
		status <- out$status
                dat<-data.frame(cbind(time,status,x))
		outw <- survreg(Surv(time, status) ~ ., data = dat)
                wout[i, 1:(p+1)] <- outw$coef
		wout[i, (p+2)] <- outw$scale                
	}
	wmn <- apply(wout, 2, mean)
	mnint <- wmn[1]
	mncoef <- wmn[2:(p+1)]
	mnscale <- wmn[(p+2)]
	list(mnint = mnint, mncoef = mncoef, mnscale = mnscale)
}


wregsim5<-function(n=100,p=4,k=1,psi=0,a=1,gam=1,clam=0.01, nruns = 100){
# See if  betahat is a consistent estimator for the WPH model.
# Calls phdata2. beta_A = -(1/gam,...,1/gam,0,...,0)^T with p-k zeroes
#beta_P = (1,...,1,0...,0)^T with k ones and p-k zeroes
# want  mncoef[i] = beta_P
	wout <- matrix(0, nrow = nruns, ncol = p)
	for(i in 1:nruns) {
		out <- phdata2(n, p, k, psi, a, gam, clam)
		x <- out$x
		time <- out$time
		status <- out$status
                dat<-data.frame(cbind(time,status,x))
		outw <- coxph(Surv(time, status) ~ ., data = dat)
                wout[i, 1:p] <- outw$coef		               
	}
	wmn <- apply(wout, 2, mean)	
	list(mncoef = wmn)
}