# R programming example 4

 

####################------working with T41-----------------------

T41=read.table("D:/T4-1.DAT", header=F)

dim(T41)

hist(T41)

T41=matrix(scan("D:/T4-1.DAT"), ncol=1)

hist(T41)

hist(T41, nclass="fd")

hist(T41, nclass="scott")

hist(T41, nclass="sturges")

hist(T41, nclass=40)

hist(T41, nclass=30)

hist(T41, nclass=20,col="lightgreen", border="purple", xlab="Radiation data (door closed)")

#--------data look like non-normal----------------

 

######################-------play with "normal" points-------------------

set.seed(493)

y=rnorm(30)

z=qqnorm(y); qqline(y, col=2)

cov(z$x, z$y)  #compare with the table given in page 182

y=rnorm(100)

qqnorm(y,main="100 points from N(0,1)",sub="Q-Q plot with line"); qqline(y, col=3)

y=rnorm(1000)

qqnorm(y); qqline(y, col=4)

 

#####################-----check normality of T41------------------------------------

par(mfrow=c(1,2))

z=qqnorm(T41, main="42 radiation data points",sub="\nQ-Q plot with line",ylab="radiation")

#compare this with Figure 4.6

qqline(T41,col="purple")

hist(T41, nclass=20,col="lightgreen", border="purple", xlab="Radiation data (door closed)")

 

cor(z$x,z$y); cor(sort(T41),qnorm((seq(1:42)-0.5)/42))

#check Table 4.2, reject normality at 0.01, 0.05, 0.1 levels

 

y=rnorm(42); qqplot(T41, y, pch=19, col="red");  qqplot(rnorm(42), y, pch=19, col=4)

#---Explain short and long tails------

 

######################----------load mvtnorm package-----------------

plot(rmvnorm(30, sigma=matrix(c(1,.9,.9,2),2)), xlab="", ylab="",xlim=c(-6,6), ylim=c(-6,6),col=2)

plot(rmvnorm(300, sigma=matrix(c(1,.9,.9,2),2)), xlab="", ylab="",xlim=c(-6,6), ylim=c(-6,6),col=3)

 

 

######################------------working on T4.3 ----lumber stiffness-------------------

T43=matrix(scan("D:T4-3.DAT"),ncol=5,byrow=T)

T43

 

#--------------check uni(bi)-variate normality, ---------------

 

qqnorm(T43[,1]); qqline(T43[,1],col="blue")

hist(T43[,1],nclass=20,col="lightgreen", border="purple",xlab="")

#do the same for other columns

 

pairs(T43[,1:4], col="blue", labels=c("x1","x2","x3","x4"), main="Pairwise plot of stiffness measures")

#compare this with Figure 4.11

 

#---------chi-square plot-----

par(mfrow=c(1,1))

plot(qchisq((seq(1:30)-0.5)/30,4),sort(T43d),pch=19,main="Chi-square plot",xlab="Chi-square quantiles",ylab="generalized distances")

abline(0,1,lty=1,cex=1.5,col="red")

#compare with Figure 4.9------------

 

 

# --------------outlier-detection---------------------

 

#------------one-D plots---------------

plot(T43[,1],rep(0,30),pch=18)

plot(T43[,2],rep(0,30),pch=19)

plot(T43[,3],rep(0,30),pch=20)

plot(T43[,4],rep(0,30),pch=21)

hist(T43[,1],nclass=20)

hist(T43[,2],nclass=20)

hist(T43[,3],nclass=20)

hist(T43[,4],nclass=20)

 

#-----------standardize and scatter plot--------------------------

colMeans(T43[,1:4])

 

T43S=t(t(T43[,1:4])-colMeans(T43[,1:4])) #= T43[,1:4]-rep(1/n,dim(T43)[1])%*%rep(1,dim(T43)[1])%*%T43[,1:4]

T43S

 

T43S=t(t(T43S)/sqrt(apply(T43[,1:4],2,var)))

cbind(T43S, T43[,5])    #compare this with Table 4.4

 

pairs(T43S, col="blue")

 

#----------- d^2_j---------

T43xbar=colMeans(T43[,1:4])

T43S=var(T43[,1:4])

T43SI=solve(T43S)

T43d=rep(0,30)

for (i in 1:30){T43d[i]=t(T43[i,1:4]-T43xbar)%*%T43SI%*%(T43[i,1:4]-T43xbar)}

 

T43d #compare with T43[,5]

#compare with qchisq(0.95, 4)=9.487729

 

 

###############--------------NORMAL TRANSFORMATION----------------

xlambda=function(x,lamb)  #box-cox transformation

{

if (lamb != 0) {(x^(lamb)-1)/lamb}

else {log(x)}

}

 

llambda=function(x,lamb)  #l-lambda

{

 n=length(x); m=length(lamb); ll=rep(0,m);

 for (i in 1:m)

 { ll[i]=(lamb[i]-1)*n*mean(log(x)) - n/2*log( (n-1)/n*var(xlambda(x,lamb[i])) ) }

ll }

 

lambda=seq(-4,4,0.2)

plot(lambda,llambda(T41,lambda))

 

lambda=seq(-0.5,1,0.01)

plot(lambda,llambda(T41,lambda))

 

lambda=seq(-0.0,0.5,0.001)

plot(lambda,llambda(T41,lambda))

 

lambda=seq(0.25,0.3,length=30)

plot(lambda,llambda(T41,lambda))

 

lambda=seq(0.27,0.28,length=30)

plot(lambda,llambda(T41,lambda))

#0.276

 

par(mfrow=c(1,2))

qqnorm(T41,main="42 radiation data points",sub="\nQ-Q plot with line",ylab="radiation")

qqline(T41,col="red")

 

T41trans=xlambda(T41,0.276)

qqnorm(T41trans,main="42 transformed radiation data points",sub="\nQ-Q plot with line",ylab="radiation")

qqline(T41trans,col="blue")