### R code for the following paper:
### Cui, Y.H. and W.Z. Yang. (2009) Zero-inflated generalized poisson regression mixture model for mapping quantitative trait loci underlying count trait with many zeros. Journal of Theoretical Biology 256: 276-285. 
### A sample data set is also provided
 
 
library(RODBC)
xlsConnect1<-odbcConnectExcel("phenotype.xls")     #phenotype file#
phenoy<-sqlFetch(xlsConnect1, "phenotype")
Pyy<-phenoy$y

odbcClose(xlsConnect1)
xlsConnect2<-odbcConnectExcel("mg2.xls")           #marker file#
marker<-sqlFetch(xlsConnect2,"markergen")
odbcClose(xlsConnect2)  

xlsConnect3<-odbcConnectExcel("markerdistant.xls")  #linkage map file#
dist<-sqlFetch(xlsConnect3,"gendist")
odbcClose(xlsConnect3)

##################################
#     all functions needed       #
##################################

f.r<-function(d) (1-exp(-2*d/100))/2

Cpf<-function(r1,r2,r)
	{
	    p1=c((1-r1)^2*(1-r2)^2, 2*r1*r2*(1-r1)*(1-r2),r1^2*r2^2)/(1-r)^2;
	    p2=c((1-r1)^2*r2*(1-r2), r1*(1-r1)*(1-2*r2+2*r2^2),r1^2*r2*(1-r2))/(r*(1-r));
	    p3=c((1-r1)^2*r2^2, 2*r1*r2*(1-r1)*(1-r2),r1^2*(1-r2)^2)/r^2;
	    p4=c(r1*(1-r1)*(1-r2)^2, r2*(1-r2)*(1-2*r1+2*r1^2),r1*(1-r1)*r2^2)/(r*(1-r));
	    p5=c(2*r1*r2*(1-r1)*(1-r2), (1-2*r1+2*r1^2)*(1-2*r2+2*r2^2),2*r1*r2*(1-r1)*(1-r2))/(1-2*r+2*r^2);
	    p6=c(r1*(1-r1)*r2^2, r2*(1-r2)*(1-2*r1+2*r1^2),r1*(1-r1)*(1-r2)^2)/(r*(1-r));
	    p7=c(r1^2*(1-r2)^2, 2*r1*r2*(1-r1)*(1-r2),(1-r1)^2*r2^2)/r^2;
	    p8=c(r1^2*r2*(1-r2), r1*(1-r1)*(1-2*r2+2*r2^2),r2*(1-r1)^2*(1-r2))/(r*(1-r));
	    p9=c(r1^2*r2^2, 2*r1*r2*(1-r1)*(1-r2),(1-r1)^2*(1-r2)^2)/(1-r)^2;
    
	    p=rbind(p1,p2,p3,p4,p5,p6,p7,p8,p9)
	    return(p)
	}

sphi.fun<-function(mm1,mm2,n)     #the conditional probability based on two marker genotype
{
 	sphi=matrix(0,n,3)
 	for(i in 1:n)
  	{ 
  	if(M[i,mm1]==2 & M[i,mm2]==2)  {sphi[i,]=rcm[1,]}
  	if(M[i,mm1]==2 & M[i,mm2]==1)  {sphi[i,]=rcm[2,]}
  	if(M[i,mm1]==2 & M[i,mm2]==0)  {sphi[i,]=rcm[3,]}
  	if(M[i,mm1]==1 & M[i,mm2]==2)  {sphi[i,]=rcm[4,]}
  	if(M[i,mm1]==1 & M[i,mm2]==1)  {sphi[i,]=rcm[5,]}
  	if(M[i,mm1]==1 & M[i,mm2]==0)  {sphi[i,]=rcm[6,]}
  	if(M[i,mm1]==0 & M[i,mm2]==2)  {sphi[i,]=rcm[7,]}
  	if(M[i,mm1]==0 & M[i,mm2]==1)  {sphi[i,]=rcm[8,]}
  	if(M[i,mm1]==0 & M[i,mm2]==0)  {sphi[i,]=rcm[9,]}
  	} 
 	return(sphi) 
}

f<-function(lambda,phi,yy) {pf<-((lambda/(1+phi*lambda))^yy)*(((1+phi*yy)^(yy-1))/factorial(yy))*exp(((-1)*lambda*(1+phi*yy))/(1+phi*lambda));return(pf)}

fpp.fun<-function(tau,phi,mu,a,d)    	#to get the conditional probability based on the phenotype and xi's
{ 
      n<-length(y)  
  	fpp<-matrix(0,3,n)
      ww<-matrix(0,3,1)
    	ww[1]=1-1/(1+(exp(mu+a))^(-1*tau))
    	ww[2]=1-1/(1+(exp(mu+d))^(-1*tau))
    	ww[3]=1-1/(1+(exp(mu-a))^(-1*tau))                	#three sets of expression when QQ,Qq and qq
  	for (i in 1:n)
  	{        
    		if(y[i]==0)  
     		{ 
      	fpp[1,i]=ww[1]+(1-ww[1])*f(exp(mu+a),phi,y[i])
      	fpp[2,i]=ww[2]+(1-ww[2])*f(exp(mu+d),phi,y[i])  
      	fpp[3,i]=ww[3]+(1-ww[3])*f(exp(mu-a),phi,y[i])
     		}
    		else if(y[i]!=0) 
     		{ 
      	fpp[1,i]=(1-ww[1])*f(exp(mu+a),phi,y[i])    
      	fpp[2,i]=(1-ww[2])*f(exp(mu+d),phi,y[i])
      	fpp[3,i]=(1-ww[3])*f(exp(mu-a),phi,y[i])
     		}
  	}
  	return(fpp)
}

fLogL0<-function(pp)
{
     stopifnot(is.numeric(pp))
     if(length(pp)!=3) stop("pp must be vector of 3")
     tau=pp[1];phi=pp[2];mu=pp[3]

	T=1+phi*sort(y); YY=sort(y)
	s=which(T<=0)
	
	lmd=exp(mu)
  	if(1+phi*lmd<=0 | length(s)>=1) phi=max(-1/(YY[s]+0.001),-1/(lmd+0.001))
        
     fpp=fpp.fun(tau,phi,mu,0,0)     
     return(-sum(log(fpp[1,])))
}

fLogL<-function(p)
{
     stopifnot(is.numeric(p))
     if(length(p)!=5) stop("p must be vector of length 5")
     tau=p[1];phi=p[2];mu=p[3]
     a=p[4];d=p[5]  
	T=1+phi*sort(y); YY=sort(y)
	s=which(T<=0)
	
	lmd1=exp(mu+a)
	lmd2=exp(mu+d)
	lmd3=exp(mu-a)
	S=sort(c(lmd1,lmd2,lmd3))
	SS=1+phi*S
	SSi=which(SS<=0)
  	if(length(s)>=1 | length(SSi)>=1) phi=max(-1/(YY[s]+0.001),-1/(S[SSi]+0.001))

	#if(length(SSi)>=1) phi=-1/(S[SSi]+0.001)
     #if(1+phi*min(y)<=0) phi=-1/max(y)

     fpp=fpp.fun(tau,phi,mu,a,d) 
     ll=-sum(diag(cphi%*%log(fpp+10^(-15))))-sum(cphi*log(sphi+10^(-15)))    
     return(ll)
} 

#####################################################
# hypothesis test on moving 2CM once a time         #
# the total number of the markers on 19 chromosomes #
#####################################################
int=2                                #scan interval#
                               
Dist<-matrix(0,length(marker)-1,1)  # get all the distances between markers 

for (i in 1:(length(marker)-1)){if(dist[2,i+1]-dist[2,i]>0) {Dist[i]=dist[2,i+1]-dist[2,i]}}

D0=c(which(Dist==0),length(Dist))

nc=matrix(0,19)                 #the number of markers'on each chromosome# 
             
for (i in 2:19) {nc[1]=D0[1];nc[i]=D0[i]-D0[i-1]};

Nd=c();
LR=c();
rep=1; ### rep=1000 for permutation 
mLLR=matrix(0,rep,19);
wmax=matrix(0,rep,19);
mpara=matrix(0,rep,5);

for (h in 1:rep)      #permutation#
   { 
      ik=0
      par00=c(0.9,0.5,0.8)
      par0=c(0.9,0.5,0.8,0.1,-0.4)

      if (rep!=1)
      {
	Py=sample(Pyy, size=length(Pyy), replace = FALSE, prob = NULL)    ### For permutation
      }
      if (rep==1)
      {
	Py=Pyy  ### orignal data analysis
      }

      for (k in 1:2)  #scan 19 chromosomes#
      { 
      		# Get d, Dis, and markers'genotype for kth chromosome#
            	d1=as.numeric(dist[2,(1+ik):(ik+nc[k])]-dist[2,ik+1]);d2=c((0:(max(d1)/int))*int)
            	d=sort(unique(c(d1,d2)))
      		N=length(d);id=c(0,0)
      		LLR<-matrix(0,N,1);pa<-matrix(0,N,5);LogL1<-matrix(0,N,1)     
      		markerk<-cbind(marker[,(ik+1):sum(nc[1:k])])  
            	for (j in 1:N)  
 		   {    
                  	dr.1=d[j]
                  	n1=length(d1)-1          
	   			for (jj in 1:n1)
    				   {
       					if (dr.1 >= d1[jj] && dr.1 < d1[(jj+1)]){ loc1=jj;id[1]=jj}
    				   }
                  	r=f.r(d1[loc1+1]-d1[loc1])
			r1=f.r(dr.1-d1[loc1])
      			r2=f.r(d1[(loc1+1)]-dr.1)
                  	Dat=data.frame(Py,markerk[,loc1],markerk[,loc1+1])
         		rcm<-Cpf(r1,r2,r)          	
         		Datnew=na.omit(Dat)                          	    
 	   		rcm<-Cpf(r1,r2,r)          	
         		Datnew=na.omit(Dat)  
         		y<-Datnew[,1];M<-cbind(Datnew[,2],Datnew[,3]);n<-length(y)
         		### H0 ###
                  	if(id[1]!=id[2]) 
			{ 
				out0=nlm(fLogL0,par00,fscale=length(y));LL0=-out0$minimum;par00=out0$estimate   
			}

                  	id[2]=id[1]       
	   		### H1 ###        
         		sphi<-sphi.fun(1,2,n)     	  	 
         		LL <- c(0,2) 
         		cvalue=10^(-7)  
         		while(abs(LL[2]-LL[1])>cvalue)
  	   		{
    				LL[1]<-LL[2]
    				### E-step ###
    				cphi<-matrix(0,length(y),3)           # this is the posterior probability of QTL genotype 
    				fpp<-fpp.fun(par0[1],par0[2],par0[3],par0[4],par0[5])                
    				for (i in 1:n)
    				{ 
     					deno=sphi[i,]%*%fpp[,i]
                  			cphi[i,1]<-(sphi[i,1]*fpp[1,i])/deno
     					cphi[i,2]<-(sphi[i,2]*fpp[2,i])/deno
     					cphi[i,3]<-(sphi[i,3]*fpp[3,i])/deno
    				}
    				### M-step ###         
            			out=nlm(fLogL,par0,iterlim=300,fscale=length(y))  ### nlm
            			par0=out$estimate       
            			LL[2]=sum(log(diag(sphi%*%fpp.fun(par0[1],par0[2],par0[3],par0[4],par0[5]))))
         		}                                                 
  	   		LogL1[j]<-LL[2]
         		LLR[j]=-2*(LL0-LogL1[j])
                  	LR=append(LR,LLR[j],after=length(LR)) 
         		pa[j,]=par0
                 	#plot(LLR)     
		}#end of j
            ik=sum(nc[1:k])
		mLLR[h,k]=max(LLR)
		wmax[h,k]=which.max(LLR)
            mpara[h,]=pa[wmax[h,k],]
            cat(k,"\n")
    	} #end of k 
     	cat(h,"\n")
}# end of h
plot(LLR)
 
########################################
### stop here for real data analysis ###
########################################

sLLR<-sLR[1:180,]
mmLR<-c(rep(0,180))
mLLr<-c(rep(0,19))
for(i in 1:19)
{
 mLr<-sort(sLLR[,i],decreasing=FALSE)
 mLLr[i]<-mLr[171]
}
mLLr               #chromosome-wise cutoff#
for(i in 1:180)
{
  mmLR[i]<-max(sLLR[i,])
}
oLR<-sort(mmLR,na.last=TRUE,decreasing=FALSE)
oLR[171]        #genome-wise cutoff#