### R code for the following paper: ### Cui, Y.H. and W.Z. Yang. (2009) Zero-inflated generalized poisson regression mixture model for mapping quantitative trait loci underlying count trait with many zeros. Journal of Theoretical Biology 256: 276-285. ### A sample data set is also provided library(RODBC) xlsConnect1<-odbcConnectExcel("phenotype.xls") #phenotype file# phenoy<-sqlFetch(xlsConnect1, "phenotype") Pyy<-phenoy$y odbcClose(xlsConnect1) xlsConnect2<-odbcConnectExcel("mg2.xls") #marker file# marker<-sqlFetch(xlsConnect2,"markergen") odbcClose(xlsConnect2) xlsConnect3<-odbcConnectExcel("markerdistant.xls") #linkage map file# dist<-sqlFetch(xlsConnect3,"gendist") odbcClose(xlsConnect3) ################################## # all functions needed # ################################## f.r<-function(d) (1-exp(-2*d/100))/2 Cpf<-function(r1,r2,r) { p1=c((1-r1)^2*(1-r2)^2, 2*r1*r2*(1-r1)*(1-r2),r1^2*r2^2)/(1-r)^2; p2=c((1-r1)^2*r2*(1-r2), r1*(1-r1)*(1-2*r2+2*r2^2),r1^2*r2*(1-r2))/(r*(1-r)); p3=c((1-r1)^2*r2^2, 2*r1*r2*(1-r1)*(1-r2),r1^2*(1-r2)^2)/r^2; p4=c(r1*(1-r1)*(1-r2)^2, r2*(1-r2)*(1-2*r1+2*r1^2),r1*(1-r1)*r2^2)/(r*(1-r)); p5=c(2*r1*r2*(1-r1)*(1-r2), (1-2*r1+2*r1^2)*(1-2*r2+2*r2^2),2*r1*r2*(1-r1)*(1-r2))/(1-2*r+2*r^2); p6=c(r1*(1-r1)*r2^2, r2*(1-r2)*(1-2*r1+2*r1^2),r1*(1-r1)*(1-r2)^2)/(r*(1-r)); p7=c(r1^2*(1-r2)^2, 2*r1*r2*(1-r1)*(1-r2),(1-r1)^2*r2^2)/r^2; p8=c(r1^2*r2*(1-r2), r1*(1-r1)*(1-2*r2+2*r2^2),r2*(1-r1)^2*(1-r2))/(r*(1-r)); p9=c(r1^2*r2^2, 2*r1*r2*(1-r1)*(1-r2),(1-r1)^2*(1-r2)^2)/(1-r)^2; p=rbind(p1,p2,p3,p4,p5,p6,p7,p8,p9) return(p) } sphi.fun<-function(mm1,mm2,n) #the conditional probability based on two marker genotype { sphi=matrix(0,n,3) for(i in 1:n) { if(M[i,mm1]==2 & M[i,mm2]==2) {sphi[i,]=rcm[1,]} if(M[i,mm1]==2 & M[i,mm2]==1) {sphi[i,]=rcm[2,]} if(M[i,mm1]==2 & M[i,mm2]==0) {sphi[i,]=rcm[3,]} if(M[i,mm1]==1 & M[i,mm2]==2) {sphi[i,]=rcm[4,]} if(M[i,mm1]==1 & M[i,mm2]==1) {sphi[i,]=rcm[5,]} if(M[i,mm1]==1 & M[i,mm2]==0) {sphi[i,]=rcm[6,]} if(M[i,mm1]==0 & M[i,mm2]==2) {sphi[i,]=rcm[7,]} if(M[i,mm1]==0 & M[i,mm2]==1) {sphi[i,]=rcm[8,]} if(M[i,mm1]==0 & M[i,mm2]==0) {sphi[i,]=rcm[9,]} } return(sphi) } f<-function(lambda,phi,yy) {pf<-((lambda/(1+phi*lambda))^yy)*(((1+phi*yy)^(yy-1))/factorial(yy))*exp(((-1)*lambda*(1+phi*yy))/(1+phi*lambda));return(pf)} fpp.fun<-function(tau,phi,mu,a,d) #to get the conditional probability based on the phenotype and xi's { n<-length(y) fpp<-matrix(0,3,n) ww<-matrix(0,3,1) ww[1]=1-1/(1+(exp(mu+a))^(-1*tau)) ww[2]=1-1/(1+(exp(mu+d))^(-1*tau)) ww[3]=1-1/(1+(exp(mu-a))^(-1*tau)) #three sets of expression when QQ,Qq and qq for (i in 1:n) { if(y[i]==0) { fpp[1,i]=ww[1]+(1-ww[1])*f(exp(mu+a),phi,y[i]) fpp[2,i]=ww[2]+(1-ww[2])*f(exp(mu+d),phi,y[i]) fpp[3,i]=ww[3]+(1-ww[3])*f(exp(mu-a),phi,y[i]) } else if(y[i]!=0) { fpp[1,i]=(1-ww[1])*f(exp(mu+a),phi,y[i]) fpp[2,i]=(1-ww[2])*f(exp(mu+d),phi,y[i]) fpp[3,i]=(1-ww[3])*f(exp(mu-a),phi,y[i]) } } return(fpp) } fLogL0<-function(pp) { stopifnot(is.numeric(pp)) if(length(pp)!=3) stop("pp must be vector of 3") tau=pp[1];phi=pp[2];mu=pp[3] T=1+phi*sort(y); YY=sort(y) s=which(T<=0) lmd=exp(mu) if(1+phi*lmd<=0 | length(s)>=1) phi=max(-1/(YY[s]+0.001),-1/(lmd+0.001)) fpp=fpp.fun(tau,phi,mu,0,0) return(-sum(log(fpp[1,]))) } fLogL<-function(p) { stopifnot(is.numeric(p)) if(length(p)!=5) stop("p must be vector of length 5") tau=p[1];phi=p[2];mu=p[3] a=p[4];d=p[5] T=1+phi*sort(y); YY=sort(y) s=which(T<=0) lmd1=exp(mu+a) lmd2=exp(mu+d) lmd3=exp(mu-a) S=sort(c(lmd1,lmd2,lmd3)) SS=1+phi*S SSi=which(SS<=0) if(length(s)>=1 | length(SSi)>=1) phi=max(-1/(YY[s]+0.001),-1/(S[SSi]+0.001)) #if(length(SSi)>=1) phi=-1/(S[SSi]+0.001) #if(1+phi*min(y)<=0) phi=-1/max(y) fpp=fpp.fun(tau,phi,mu,a,d) ll=-sum(diag(cphi%*%log(fpp+10^(-15))))-sum(cphi*log(sphi+10^(-15))) return(ll) } ##################################################### # hypothesis test on moving 2CM once a time # # the total number of the markers on 19 chromosomes # ##################################################### int=2 #scan interval# Dist<-matrix(0,length(marker)-1,1) # get all the distances between markers for (i in 1:(length(marker)-1)){if(dist[2,i+1]-dist[2,i]>0) {Dist[i]=dist[2,i+1]-dist[2,i]}} D0=c(which(Dist==0),length(Dist)) nc=matrix(0,19) #the number of markers'on each chromosome# for (i in 2:19) {nc[1]=D0[1];nc[i]=D0[i]-D0[i-1]}; Nd=c(); LR=c(); rep=1; ### rep=1000 for permutation mLLR=matrix(0,rep,19); wmax=matrix(0,rep,19); mpara=matrix(0,rep,5); for (h in 1:rep) #permutation# { ik=0 par00=c(0.9,0.5,0.8) par0=c(0.9,0.5,0.8,0.1,-0.4) if (rep!=1) { Py=sample(Pyy, size=length(Pyy), replace = FALSE, prob = NULL) ### For permutation } if (rep==1) { Py=Pyy ### orignal data analysis } for (k in 1:2) #scan 19 chromosomes# { # Get d, Dis, and markers'genotype for kth chromosome# d1=as.numeric(dist[2,(1+ik):(ik+nc[k])]-dist[2,ik+1]);d2=c((0:(max(d1)/int))*int) d=sort(unique(c(d1,d2))) N=length(d);id=c(0,0) LLR<-matrix(0,N,1);pa<-matrix(0,N,5);LogL1<-matrix(0,N,1) markerk<-cbind(marker[,(ik+1):sum(nc[1:k])]) for (j in 1:N) { dr.1=d[j] n1=length(d1)-1 for (jj in 1:n1) { if (dr.1 >= d1[jj] && dr.1 < d1[(jj+1)]){ loc1=jj;id[1]=jj} } r=f.r(d1[loc1+1]-d1[loc1]) r1=f.r(dr.1-d1[loc1]) r2=f.r(d1[(loc1+1)]-dr.1) Dat=data.frame(Py,markerk[,loc1],markerk[,loc1+1]) rcm<-Cpf(r1,r2,r) Datnew=na.omit(Dat) rcm<-Cpf(r1,r2,r) Datnew=na.omit(Dat) y<-Datnew[,1];M<-cbind(Datnew[,2],Datnew[,3]);n<-length(y) ### H0 ### if(id[1]!=id[2]) { out0=nlm(fLogL0,par00,fscale=length(y));LL0=-out0$minimum;par00=out0$estimate } id[2]=id[1] ### H1 ### sphi<-sphi.fun(1,2,n) LL <- c(0,2) cvalue=10^(-7) while(abs(LL[2]-LL[1])>cvalue) { LL[1]<-LL[2] ### E-step ### cphi<-matrix(0,length(y),3) # this is the posterior probability of QTL genotype fpp<-fpp.fun(par0[1],par0[2],par0[3],par0[4],par0[5]) for (i in 1:n) { deno=sphi[i,]%*%fpp[,i] cphi[i,1]<-(sphi[i,1]*fpp[1,i])/deno cphi[i,2]<-(sphi[i,2]*fpp[2,i])/deno cphi[i,3]<-(sphi[i,3]*fpp[3,i])/deno } ### M-step ### out=nlm(fLogL,par0,iterlim=300,fscale=length(y)) ### nlm par0=out$estimate LL[2]=sum(log(diag(sphi%*%fpp.fun(par0[1],par0[2],par0[3],par0[4],par0[5])))) } LogL1[j]<-LL[2] LLR[j]=-2*(LL0-LogL1[j]) LR=append(LR,LLR[j],after=length(LR)) pa[j,]=par0 #plot(LLR) }#end of j ik=sum(nc[1:k]) mLLR[h,k]=max(LLR) wmax[h,k]=which.max(LLR) mpara[h,]=pa[wmax[h,k],] cat(k,"\n") } #end of k cat(h,"\n") }# end of h plot(LLR) ######################################## ### stop here for real data analysis ### ######################################## sLLR<-sLR[1:180,] mmLR<-c(rep(0,180)) mLLr<-c(rep(0,19)) for(i in 1:19) { mLr<-sort(sLLR[,i],decreasing=FALSE) mLLr[i]<-mLr[171] } mLLr #chromosome-wise cutoff# for(i in 1:180) { mmLR[i]<-max(sLLR[i,]) } oLR<-sort(mmLR,na.last=TRUE,decreasing=FALSE) oLR[171] #genome-wise cutoff#