Why Compositional Data must be transformed

require(scales)
require(robCompositions)

Impact of transforming data to percentages

Independent distributions (no correlation)

Simulate data of independent compound weight per sample:

a <- rnorm(1000,100,5)
b <- rnorm(1000,200,2)
x <- rnorm(1000,300,3)
head(cbind(a,b,x))
##              a        b        x
## [1,]  99.32111 204.0996 298.4474
## [2,]  99.61334 200.6502 301.3513
## [3,]  95.29100 197.6434 299.5185
## [4,] 103.51921 199.8141 299.6999
## [5,]  99.74045 200.2789 302.0008
## [6,] 103.47327 197.5396 295.9680

Check relationships

pairs(cbind(a,b,x),pch=20,col=alpha("black",alpha=0.25))

plot of chunk unnamed-chunk-3

round(cor(cbind(a,b,x)),2)
##      a    b    x
## a 1.00 0.00 0.03
## b 0.00 1.00 0.01
## x 0.03 0.01 1.00

Make the data Compositional (aka “closed” or “bounded” data)

a2 <- 100*a/(a+b+x)
b2 <- 100*b/(a+b+x)
x2 <- 100*x/(a+b+x)

Note how Correlational structure disrupted: misleading correlation introduced

pairs(cbind(a2,b2,x2),pch=20,col=alpha("black",alpha=0.25))

plot of chunk unnamed-chunk-5

round(cor(cbind(a2,b2,x2)),2)
##       a2    b2    x2
## a2  1.00 -0.71 -0.83
## b2 -0.71  1.00  0.19
## x2 -0.83  0.19  1.00
par(mfrow=c(2,3))
hist(a)
hist(b)
hist(x)
hist(a2)
hist(b2)
hist(x2)

plot of chunk unnamed-chunk-5

Effect on correlated variables

Simulate data of compound weights with initial correlation

a <- rnorm(1000,100,5)
b <- a*2 + rnorm(1000,0,2)
x <- a*3 + rnorm(1000,0,6)
y <- rnorm(1000,50,3)

Check relationships

pairs(cbind(a,b,x,y),pch=20,col=alpha("black",alpha=0.25), main="Original")

plot of chunk unnamed-chunk-7

round(cor(cbind(a,b,x,y)),2)
##       a     b     x     y
## a  1.00  0.98  0.93 -0.05
## b  0.98  1.00  0.91 -0.04
## x  0.93  0.91  1.00 -0.04
## y -0.05 -0.04 -0.04  1.00

Make the data Compositional (aka “closed” or “bounded” data)

a2 <- 100*a/(a+b+x+y)
b2 <- 100*b/(a+b+x+y)
x2 <- 100*x/(a+b+x+y)
y2 <- 100*y/(a+b+x+y)

Correlational structure disrupted: original correlation modified

round(cor(cbind(a2,b2,x2,y2)),2)
##       a2    b2    x2    y2
## a2  1.00  0.68 -0.34 -0.44
## b2  0.68  1.00 -0.49 -0.41
## x2 -0.34 -0.49  1.00 -0.57
## y2 -0.44 -0.41 -0.57  1.00
pairs(cbind(a2,b2,x2,y2),pch=20,col=alpha("black",alpha=0.25), main="Compositional")

plot of chunk unnamed-chunk-9

par(mfrow=c(2,4))
hist(a)
hist(b)
hist(x)
hist(a2)
hist(b2)
hist(x2)

plot of chunk unnamed-chunk-9

Transformations for Compositional Data

log ratio transformation

a3 <- log10(a2/y2)
b3 <- log10(b2/y2)
x3 <- log10(x2/y2)
round(cor(cbind(a3,b3,x3)),2)
##      a3   b3   x3
## a3 1.00 0.99 0.97
## b3 0.99 1.00 0.96
## x3 0.97 0.96 1.00
pairs(cbind(a,b,x),pch=20,col=alpha("black",alpha=0.25),main="original")
pairs(cbind(a2,b2,x2),pch=20,col=alpha("black",alpha=0.25),main="compositional")
pairs(cbind(a3,b3,x3),pch=20,col=alpha("black",alpha=0.25),main="log-ratio")

plot of chunk unnamed-chunk-10 plot of chunk unnamed-chunk-10 plot of chunk unnamed-chunk-10

Centred log ratio transformation

ori <- cbind(a,b,x,y)
compo <- cbind(a2,b2,x2,y2)
lcompo <- log10(compo)
ml <- apply(lcompo,1,mean)
clr <- lcompo-ml
round(cor(clr),2)
##       a2    b2    x2    y2
## a2  1.00  0.89  0.64 -0.94
## b2  0.89  1.00  0.56 -0.90
## x2  0.64  0.56  1.00 -0.84
## y2 -0.94 -0.90 -0.84  1.00
pairs(ori,pch=20,col=alpha("black",alpha=0.25),main="original")
pairs(compo,pch=20,col=alpha("black",alpha=0.25),main="compositional")
pairs(clr,pch=20,col=alpha("black",alpha=0.25),main="centered log-ratio")

plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12

Isometric logratio transformation

#ilr {robCompositions}
ilrdata <- isomLR(compo)
round(cor(ilrdata),2)
##      [,1] [,2] [,3]
## [1,] 1.00 0.94 0.88
## [2,] 0.94 1.00 0.86
## [3,] 0.88 0.86 1.00
pairs(ori,pch=20,col=alpha("black",alpha=0.25),main="original")
pairs(cbind(a2,b2,x2,y2),pch=20,col=alpha("black",alpha=0.25), main="Compositional")
pairs(ilrdata,pch=20,col=alpha("black",alpha=0.25),main="isometric log-ratio")

plot of chunk unnamed-chunk-14