#------------------------------------------------------------------------# # Created on : May 21, 2015 # AUTHOR : Hyokyoung Grace Hong jointly with Lan Wang and Xuming He # AFFILIATION : Michigan State University # EMAIL : hhong@stt.msu.edu # Simulate Example 1 in Hong, Wang, and He (2015, # A data-driven approach to conditional screening of dimensional variables, manuscript) # # This program performs the conditional sure independence screening, where # conditioning set is selected by SPCA, especially fantope projection. ## Inputs: ## x: covariates ## y: response variable ## family: "gaussian", "binomial", etc. ## nsize: the number of conditioning variables that will be used. ## lambda: Fantope projection tuning parameter ## ## Outputs: ## coef: slope coefficients ## rank: rank of coef ## Cset: selected conditioning set # R: 3.1.1 #-------------------------------------------------------------------------# ## Proposed Method (3S) ## Date: May 21, 2015 ## Authors: Grace Hong, Lan Wang, and Xuming He ## R code is written by Grace Hong (hhong@stt.msu.edu) ## ## Inputs: ## x: covariates ## y: response variable ## family: "gaussian","binomial", etc. ## nsize: the number of conditioning variables that will be used. ## lambda: Fantope projection tuning parameter ## ## Outputs: ## coef: slope coefficients ## rank: rank of coef ## Cset: selected conditioning set ## #---------------------------------- #the beginning of the code Screen3S=function(x,y,family,nsize,lambda){ n=dim(x)[1];p=dim(x)[2] fullset=1:p x<- apply(x,2, function(x) (x-mean(x))/sd(x)) ##Step1) Pre-screening screening ## find the q variables with the largest magnitude q=n beta0=rep(0,p) for(j in 1:p){ Xtemp=cbind(x[,j]); glm=glm(y~Xtemp,family=family) beta0[j]=abs(glm$coeff)[2]; } subset=rep(0,q) sort(beta0,TRUE) ->aa for ( i in 1:q){ subset[i]=which(beta0==aa[i]) } #Step 2) Fantope projection subset=sort(subset) out <- fps(cor(x[,subset]), ndim = nsize) #cvout <- cv(out, x[,subset], FUN = cor, verbose = 1) #v1 <- coef(out, lambda=cvout$lambda.cv) v <- coef(out, lambda=lambda) # select the conditioning set maxi=function(x){max(x,na.rm=TRUE)} vv=apply(abs(v),1,maxi) id=NULL for(jj in 1:nsize){ id1=which(abs(vv)==sort(abs(vv), TRUE)[jj]) id=c(id, id1) } Cset=subset[unique(id)[1:nsize]] ## Step3) Perform CSIS scanset=fullset[-Cset]; cSize=length(Cset); beta=rep(0,p) for(j in scanset){ Xtemp=cbind(x[,Cset],x[,j]); glm=glm(y~Xtemp,family=family) beta[j]=abs(glm$coeff)[cSize+2]; } ##replace beta as NA if variables are selected as Cset in Step 2 beta[Cset]= NA list(coef=beta, rank=p+1-length(Cset)-rank(beta,na.last="keep"), Cset=Cset) } #-----------------------# ## the end of the code ## Reference ## [1] Hong, Wang, and He (2015) ## [2] Barut, Fan, Verhasselt (2012) Conditional sure independence ## screening. manuscript ## [3] Vu, Cho, Lei, Rohe (2013) Fantope projection and selection: ## a near-optimal convex relaxation of sparse PCA. Advances ## in Neural Information Processing Systems 26, Curran Associates,Inc., ## 2670-2678 #------------------------------------ ## Example 1 in the manuscript #----------------------------------- ## load library library(fps) # To use Fantope projection # data generation n=100; p=1000 b1<- 3;b2<- 3;b3<-3;b4<-3;b5<-3; b6<- -7.5 truerho=0.5 corrmat=diag(rep(1-truerho,p))+matrix(truerho, p,p) cholmat=chol(corrmat) X0=rnorm(n*p,mean=0,sd=1) x=matrix(X0,n,p) x=x%*%cholmat y=x[,1:6]%*%rbind(b1,b2,b3,b4,b5,b6)+rnorm(n,sd=1) ## apply the proposed method (3S) result= Screen3S(x,y,family=gaussian,nsize=2,lambda=0.5) result$rank[1:6] #the slope coefficients of the first six variables ## Note: ## nsize: the number of variables chosen to be the conditioning set ## lambda: Fantope projection tuning parameter, the cross-validation can ## be performed to find the good lambda