# Created by Yuehua Cui on 12/11/07. # Copyright 2007 Michigan State University. All rights reserved. # This code is designed for the paper in Cui et al. (2006) Genomics # Testing imprinting in an F2 line cross population # Assumption: d_female=1.25*d_male (true for mouse), rm(list=objects()) #rm(list=ls()) ########################## ### experiment design ### ### AaxAa(F2) ### ### AA=1; Aa=0; aa=-1 ### ### missing data=9 ### ########################## cvalue=1e-07 ### EM convergence value interval=2 ### linkage scan increment ### load data ### MM=read.table("c:/marker.txt") ### Marker data y1=read.table("c:/y.txt") ### phenotype data distance=read.table("c:/distance.txt") ### a column vector contains the genetic distance for all chromosomes, start with 0, for example, (0,5.6,21.3,0,8.7,10.3,8.7,0,1.3,....) dr=t(distance) pos=scan("c:/mp.txt") ### a vector contains the number of markers for each chromosome, for example (3,4,...) mp=cumsum(pos) chr1=dr[1:mp[1]]; chr2=dr[(mp[1]+1):mp[2]];chr3=dr[(mp[2]+1):mp[3]]; chr4=dr[(mp[3]+1):mp[4]] chr5=dr[(mp[4]+1):mp[5]];chr6=dr[(mp[5]+1):mp[6]];chr7=dr[(mp[6]+1):mp[7]] chr8=dr[(mp[7]+1):mp[8]];chr9=dr[(mp[8]+1):mp[9]];chr10=dr[(mp[9]+1):mp[10]] chr11=dr[(mp[10]+1):mp[11]];chr12=dr[(mp[11]+1):mp[12]];chr13=dr[(mp[12]+1):mp[13]] chr14=dr[(mp[13]+1):mp[14]];chr15=dr[(mp[14]+1):mp[15]] chr16=dr[(mp[15]+1):mp[16]];chr17=dr[(mp[16]+1):mp[17]] chr18=dr[(mp[17]+1):mp[18]];chr19=dr[(mp[18]+1):mp[19]] CHR1=cumsum(chr1);CHR2=cumsum(chr2); CHR3=cumsum(chr3); CHR4=cumsum(chr4); CHR5=cumsum(chr5); CHR6=cumsum(chr6); CHR7=cumsum(chr7) CHR8=cumsum(chr8);CHR9=cumsum(chr9); CHR10=cumsum(chr10); CHR11=cumsum(chr11); CHR12=cumsum(chr12); CHR13=cumsum(chr13); CHR14=cumsum(chr14) CHR15=cumsum(chr15);CHR16=cumsum(chr16); CHR17=cumsum(chr17); CHR18=cumsum(chr18); CHR19=cumsum(chr19) dd1=c((0:(max(CHR1)/interval))*interval); dd2=c((0:(max(CHR2)/interval))*interval) dd3=c((0:(max(CHR3)/interval))*interval); dd4=c((0:(max(CHR4)/interval))*interval) dd5=c((0:(max(CHR5)/interval))*interval); dd6=c((0:(max(CHR6)/interval))*interval) dd7=c((0:(max(CHR7)/interval))*interval); dd8=c((0:(max(CHR8)/interval))*interval) dd9=c((0:(max(CHR9)/interval))*interval); dd10=c((0:(max(CHR10)/interval))*interval) dd11=c((0:(max(CHR11)/interval))*interval);dd12=c((0:(max(CHR12)/interval))*interval) dd13=c((0:(max(CHR13)/interval))*interval);dd14=c((0:(max(CHR14)/interval))*interval) dd15=c((0:(max(CHR15)/interval))*interval);dd16=c((0:(max(CHR16)/interval))*interval) dd17=c((0:(max(CHR17)/interval))*interval);dd18=c((0:(max(CHR18)/interval))*interval) dd19=c((0:(max(CHR19)/interval))*interval) d1=sort(unique(c(CHR1[2:mp[1]],dd1))); n1=length(d1) d2=sort(unique(c(CHR2[2:mp[2]],dd2))); n2=length(d2) d3=sort(unique(c(CHR3[2:mp[3]],dd3))); n3=length(d3) d4=sort(unique(c(CHR4[2:mp[4]],dd4))); n4=length(d4) d5=sort(unique(c(CHR5[2:mp[5]],dd5))); n5=length(d5) d6=sort(unique(c(CHR6[2:mp[6]],dd6))); n6=length(d6) d7=sort(unique(c(CHR7[2:mp[7]],dd7))); n7=length(d7) d8=sort(unique(c(CHR8[2:mp[8]],dd8))); n8=length(d8) d9=sort(unique(c(CHR9[2:mp[9]],dd9))); n9=length(d9) d10=sort(unique(c(CHR10[2:mp[10]],dd10))); n10=length(d10) d11=sort(unique(c(CHR11[2:mp[11]],dd11))); n11=length(d11) d12=sort(unique(c(CHR12[2:mp[12]],dd12))); n12=length(d12) d13=sort(unique(c(CHR13[2:mp[13]],dd13))); n13=length(d13) d14=sort(unique(c(CHR14[2:mp[14]],dd14))); n14=length(d14) d15=sort(unique(c(CHR15[2:mp[15]],dd15))); n15=length(d15) d16=sort(unique(c(CHR16[2:mp[16]],dd16))); n16=length(d16) d17=sort(unique(c(CHR17[2:mp[17]],dd17))); n17=length(d17) d18=sort(unique(c(CHR18[2:mp[18]],dd18))); n18=length(d18) d19=sort(unique(c(CHR19[2:mp[19]],dd19))); n19=length(d19) ### Get the marker ### M1=MM[,1:mp[1]]; M2=MM[,(mp[1]+1):mp[2]];M3=MM[,(mp[2]+1):mp[3]]; M4=MM[,(mp[3]+1):mp[4]];M5=MM[,(mp[4]+1):mp[5]];M6=MM[,(mp[5]+1):mp[6]];M7=MM[,(mp[6]+1):mp[7]] M8=MM[,(mp[7]+1):mp[8]];M9=MM[,(mp[8]+1):mp[9]];M10=MM[,(mp[9]+1):mp[10]] M11=MM[,(mp[10]+1):mp[11]];M12=MM[,(mp[11]+1):mp[12]];M13=MM[,(mp[12]+1):mp[13]] M14=MM[,(mp[13]+1):mp[14]];M15=MM[,(mp[14]+1):mp[15]] M16=MM[,(mp[15]+1):mp[16]];M17=MM[,(mp[16]+1):mp[17]] M18=MM[,(mp[17]+1):mp[18]];M19=MM[,(mp[18]+1):mp[19]] fr <- function(d) (1-exp(-2*d/100))/2 ### Haldane map function ### The conditional prob. of QTL given on markers fp=function(r.m1,r.m2,r.p1,r.p2,rm,rp) { rm00=(1-r.m1)*(1-r.m2); rp00=(1-r.p1)*(1-r.p2) rm01=(1-r.m1)*r.m2; rp01=(1-r.p1)*r.p2 rm10=r.m1*(1-r.m2); rp10=r.p1*(1-r.p2) rm11=r.m1*r.m2; rp11=r.p1*r.p2 p11=rm00*rp00/((1-rm)*(1-rp));p12=rm00*rp11/((1-rm)*(1-rp));p13=rm11*rp00/((1-rm)*(1-rp));p14=rm11*rp11/((1-rm)*(1-rp)) p21=(rm01*rp00+rm00*rp01)/(rm*(1-rp)+(1-rm)*rp);p22=(rm01*rp11+rm00*rp10)/(rm*(1-rp)+(1-rm)*rp);p23=(rm10*rp00+rm11*rp01)/(rm*(1-rp)+(1-rm)*rp);p24=(rm10*rp11+rm11*rp10)/(rm*(1-rp)+(1-rm)*rp) p31=rm01*rp01/(rm*rp);p32=rm01*rp10/(rm*rp);p33=rm10*rp01/(rm*rp);p34=rm10*rp10/(rm*rp) p41=(rm00*rp10+rm10*rp00)/((1-rm)*rp+rm*(1-rp));p42=(rm00*rp01+rm10*rp11)/((1-rm)*rp+rm*(1-rp));p43=(rm11*rp10+rm01*rp00)/((1-rm)*rp+rm*(1-rp)); p44=(rm11*rp01+rm01*rp11)/((1-rm)*rp+rm*(1-rp)) p51=(rm00*rp11+rm01*rp10+rm10*rp01+rm11*rp00)/(2*(1-rm)*(1-rp)+2*rm*rp);p52=(rm00*rp00+rm01*rp01+rm10*rp10+rm11*rp11)/(2*(1-rm)*(1-rp)+2*rm*rp);p53=(rm11*rp11+rm10*rp10+rm01*rp01+rm00*rp00)/(2*(1-rm)*(1-rp)+2*rm*rp);p54=(rm11*rp00+rm10*rp01+rm01*rp10+rm00*rp11)/(2*(1-rm)*(1-rp)+2*rm*rp) p61=(rm11*rp01+rm01*rp11)/((1-rm)*rp+rm*(1-rp));p62=(rm11*rp10+rm01*rp00)/((1-rm)*rp+rm*(1-rp));p63=(rm00*rp01+rm10*rp11)/((1-rm)*rp+rm*(1-rp));p64=(rm00*rp10+rm10*rp00)/((1-rm)*rp+rm*(1-rp)) p71=rm10*rp10/(rm*rp);p72=rm10*rp01/(rm*rp);p73=rm01*rp10/(rm*rp);p74=rm01*rp01/(rm*rp) p81=(rm10*rp11+rm11*rp10)/(rm*(1-rp)+(1-rm)*rp);p82=(rm10*rp00+rm11*rp01)/(rm*(1-rp)+(1-rm)*rp);p83=(rm01*rp11+rm00*rp10)/(rm*(1-rp)+(1-rm)*rp);p84=(rm01*rp00+rm00*rp01)/(rm*(1-rp)+(1-rm)*rp) p91=rm11*rp11/((1-rm)*(1-rp));p92=rm11*rp00/((1-rm)*(1-rp));p93=rm00*rp11/((1-rm)*(1-rp));p94=rm00*rp00/((1-rm)*(1-rp)) p = rbind(c(p11,p12,p13,p14),c(p21,p22,p23,p24),c(p31,p32,p33,p34),c(p41,p42,p43,p44),c(p51,p52,p53,p54),c(p61,p62,p63,p64),c(p71,p72,p73,p74),c(p81,p82,p83,p84),c(p91,p92,p93,p94)) return(p) } ### Function to get the prior probability matrix ### f.sphi=function(M.1,M.2,p,n) { z=matrix(0,n,4) for (i in 1:n) { if(M.1[i]==1 && M.2[i]==1) z[i,]=p[1,] else if(M.1[i]==1 && M.2[i]==0) z[i,]=p[2,] else if(M.1[i]==1 && M.2[i]==-1) z[i,]=p[3,] else if(M.1[i]==0 && M.2[i]==1) z[i,]=p[4,] else if(M.1[i]==0 && M.2[i]==0) z[i,]=p[5,] else if(M.1[i]==0 && M.2[i]==-1) z[i,]=p[6,] else if(M.1[i]==-1 && M.2[i]==1) z[i,]=p[7,] else if(M.1[i]==-1 && M.2[i]==0) z[i,]=p[8,] else if(M.1[i]==-1 && M.2[i]==-1) z[i,]=p[9,] } return(z) } fL=function(dp) { rp=fr(dp); rm=fr(1.25*dp); R=(N1*log((1-rm)*(1-rp))+N2*log(rm^2)+N3*log(rm*(1-rp)+(1-rm)*rp)+N4*log(rm^2+(1-rm)*(1-rp))); return(R) } ### Function to estimate r_m and r_f fcount=function(MM1,MM2) { nn=length(MM1) n1=0; n2=0;n3=0;n4=0;n5=0;n6=0;n7=0;n8=0;n9=0 for (i in 1:nn) { if(MM1[i]!=9 && MM2[i]!=9) { if(MM1[i]==1 && MM2[i]==1) n1=n1+1 else if(MM1[i]==1 && MM2[i]==0) n2=n2+1 else if(MM1[i]==1 && MM2[i]==-1) n3=n3+1 else if(MM1[i]==0 && MM2[i]==1) n4=n4+1 else if(MM1[i]==0 && MM2[i]==0) n5=n5+1 else if(MM1[i]==0 && MM2[i]==-1) n6=n6+1 else if(MM1[i]==-1 && MM2[i]==1) n7=n7+1 else if(MM1[i]==-1 && MM2[i]==0) n8=n8+1 else if(MM1[i]==-1 && MM2[i]==-1) n9=n9+1 } } return(c(n1,n2,n3,n4,n5,n6,n7,n8,n9)) } scan=sum(c(n1,n2,n3,n4,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14,n15,n16,n17,n18,n19)) ## total number of positions to scan ddr=c(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18,d19) ## total positions to scan cn=cumsum(c(n1,n2,n3,n4,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14,n15,n16,n17,n18,n19)) mus=matrix(0,scan,5) ### matrix to save the parameters LR=matrix(0,scan,4) ### matrix to save the LR, chromosome and the position InterCheck=c(0,0) for (ww in 1:scan) # nn location to scan { if(ww<=n1){chrp=CHR1;M.1=M1;posi=1}else if(ww>n1 && ww<=cn[2]){chrp=CHR2;M.1=M2;posi=2}else if(ww>cn[2] && ww<=cn[3]){chrp=CHR3;M.1=M3;posi=3}else if(ww>cn[3] && ww<=cn[4]) {chrp=CHR4;M.1=M4;posi=4}else if(ww>cn[4] && ww<=cn[5]) {chrp=CHR5;M.1=M5;posi=5}else if(ww>cn[5] && ww<=cn[6]) {chrp=CHR6;M.1=M6;posi=6}else if(ww>cn[6] && ww<=cn[7]) {chrp=CHR7;M.1=M7;posi=7}else if(ww>cn[7] && ww<=cn[8]) {chrp=CHR8;M.1=M8;posi=8}else if(ww>cn[8] && ww<=cn[9]) {chrp=CHR9;M.1=M9;posi=9}else if(ww>cn[9] && ww<=cn[10]) {chrp=CHR10;M.1=M10;posi=10}else if(ww>cn[10] && ww<=cn[11]) {chrp=CHR11;M.1=M11;posi=11}else if(ww>cn[11] && ww<=cn[12]) {chrp=CHR12;M.1=M12;posi=12}else if(ww>cn[12] && ww<=cn[13]) {chrp=CHR13;M.1=M13;posi=13}else if(ww>cn[13] && ww<=cn[14]) {chrp=CHR14;M.1=M14;posi=14}else if(ww>cn[14] && ww<=cn[15]) {chrp=CHR15;M.1=M15;posi=15}else if(ww>cn[15] && ww<=cn[16]) {chrp=CHR16;M.1=M16;posi=16}else if(ww>cn[16] && ww<=cn[17]) {chrp=CHR17;M.1=M17;posi=17}else if(ww>cn[17] && ww<=cn[18]) {chrp=CHR18;M.1=M18;posi=18}else if(ww>cn[18] && ww<=cn[19]) {chrp=CHR19;M.1=M19;posi=19} dr1=ddr[ww] nl=length(chr)-1 for (jj in 1:nl) { if (dr1 >= chr[jj] & dr1 < chr[jj+1]) { dint=chr[jj+1]-chr[jj] r=fr(dint) r1=fr(dr1-chr[jj]);r2=fr(chr[jj+1]-dr1) loc1=jj } } if (dr1==max(chr)) { dint=chr[nl+1]-chr[nl] r=fr(dint) r1=fr(dr1-chr[nl]);r2=fr(chr[nl+1]-dr1) loc1=nl } InterCheck[2]=loc1 ### Only calculate one null at an interval ### if (InterCheck[2] != InterCheck[1]) { InterCheck[1]=InterCheck[2] y=subset(y1,M.1[,loc1]!=9 & M.1[,(loc1+1)]!=9) ### 9 is missing value y=as.matrix(y) n=length(y) MMg1=as.data.frame(M.1[,loc1:I(loc1+1)]) MMM1=subset(MMg1,MMg1[,1]!=9 & MMg1[,2]!=9) nn=fcount(MMM1[,1],MMM1[,2]) n22=nn[1];n21=nn[2];n20=nn[3];n12=nn[4];n11=nn[5];n10=nn[6];n02=nn[7];n01=nn[8];n00=nn[9] N1=n22+n00; N2=n20+n02; N3=n21+n12+n10+n01; N4=n11 fopt = optim(dint, fL, method = "BFGS",control = list(fnscale=-1, maxit=200000, reltol=1e-9)) # hessian = TRUE, dp=fopt$par; dm=1.25*dp rp=fr(dp); rm=fr(1.25*dp); ### Under the null my=mean(y) ll0=0 sd.null <- sqrt((n-1)*var(y)/n) ll0=sum(log(dnorm(y,my,sd.null))) } ddd1=dr1-chr[loc1]; ddd2=chr[loc1+1]-dr1; rp11=fr(ddd1*dp/(dint));rp22=fr(ddd2*dp/(dint)); rm11=fr(ddd1*dm/(dint));rm22=fr(ddd2*dm/(dint)); pp <- fp(rm11,rm22,rp11,rp22,rm,rp) sphi=f.sphi(MMM1[,1],MMM1[,2],pp,n) ### Estimate the parameter # Give the initial value for the EM algorithm mu.1 <- my+0.5; mu.21 <- my; mu.22=my; mu.3=my-0.5; sigm <- sd.null par.value=as.matrix(c(mu.1,mu.21,mu.22,mu.3)) D=rbind(c(1,1/2,1/2,1/4),c(1,1/2,-1/2,-1/4),c(1,-1/2,1/2,-1/4),c(1,-1/2,-1/2,1/4)) L <- c(1,5) while(abs(L[2]-L[1])>cvalue)# && abs(m.s[2]-m.s[1])>cvalue) { L[1] <- L[2] #************************ # E-step * #************************ cphi <- matrix(0,n,4) # this is the capital phi matrix for (i in 1:n) { deno <- sphi[i,]%*%dnorm(y[i],par.value,sigm) cphi[i,1] <- sphi[i,1]*dnorm(y[i],mu.1,sigm)/deno cphi[i,2] <- sphi[i,2]*dnorm(y[i],mu.21,sigm)/deno cphi[i,3] <- sphi[i,3]*dnorm(y[i],mu.22,sigm)/deno cphi[i,4] <- sphi[i,4]*dnorm(y[i],mu.3,sigm)/deno } #************************ # M-step * #************************ for (j in 1:4) { par.value[j,] <- cphi[,j]%*%y/sum(cphi[,j]) } mu.1 <- par.value[1,];mu.21 <- par.value[2,];mu.22 <- par.value[3,];mu.3 <- par.value[4,] sigm2=(cphi[,1]%*%(y-mu.1)^2+cphi[,2]%*%(y-mu.21)^2+cphi[,3]%*%(y-mu.22)^2+cphi[,4]%*%(y-mu.3)^2)/n sigm <- sqrt(sigm2) ### calculate the log Likelihood function llr=sum(log(sphi[,1]*dnorm(y,mu.1,sigm)+sphi[,2]*dnorm(y,mu.21,sigm)+sphi[,3]*dnorm(y,mu.22,sigm)+sphi[,4]*dnorm(y,mu.3,sigm))) L[2] <- llr #cat(llr,"\n") } mus[ww,]=c(solve(D)%*%par.value,sigm) LR[ww,]=c(ww,posi,dr1,-2*(ll0-llr)) cat(LR[ww,],"\n") } fin=cbind(LR,mus)