Hierarchical Clustering of random sample of segments (without repetitions)

knitr::opts_chunk$set(warning=FALSE, message=FALSE, fig.height=6, fig.width=8,
                      fig.path = "figure/hclust_")

require(rgdal)
require(raster)
require(fastcluster)
source("/media/alobo/LaCie2T1/Rutils/colLab.R")
## Warning in file(filename, "r", encoding = encoding): cannot open file '/
## media/alobo/LaCie2T1/Rutils/colLab.R': No such file or directory
## Error in file(filename, "r", encoding = encoding): cannot open the connection
#source("/Volumes/LaCie2T1/Rutils/colLab.R")
#require(dendextend)
require(plyr)
require(scales)

dirvec <- "/media/alobo/LaCie2T1/FCConesa/Sahara2/SaharaOTBseg/sub1"
dirdata <- "/media/alobo/LaCie2T1/FCConesa/Sahara2/sahara_datatest"
# dirvec <- "/Volumes/LaCie2T1/FCConesa/Sahara2/SaharaOTBseg/sub1"
# dirdata <- "/Volumes/LaCie2T1/FCConesa/Sahara2/sahara_datatest"

1. Read Data

segMSPCA <- readOGR(dsn=dirvec, layer="WV3sub1PCAMSS_r100_s5_R5_S5_m0_m20",
                 stringsAsFactors = FALSE)
names(segMSPCA@data)
names(segMSPCA@data)[3:5] <- paste0("meanPC",1:3)
names(segMSPCA@data)[6:8] <- paste0("varPC",1:3)
#we get rid of the left vertical bar with 0 values
segMSPCA@data[segMSPCA@data$label==1,]
plot(segMSPCA[segMSPCA@data$label==1,])
#plot(segMSPCA[segMSPCA@data$label!=1,]) #very sloow!!!
segMSPCA <- segMSPCA[segMSPCA@data$label!=1,]

#save(segMSPCA,file="segMSPCA.rda")

2. Random sample

set.seed(1000)
rind <- sample(1:nrow(segMSPCA@data),5000)
nsamples <- round(50000*prob)
segMSPCArnd <- segMSPCA[nsamples>0,]
segMSPCArnd <- segMSPCA[rind,]
dim(segMSPCArnd)
## [1] 5000    8
plot(segMSPCArnd@data[,3:4],type="p",pch=20,col=alpha("black",0.10))

plot of chunk unnamed-chunk-3

plot(segMSPCArnd)
save(segMSPCArnd,file="segMSPCArnd.rda")
#load("segMSPCArnd.rda")

3. Clustering

#make sure require(fastcluster): really much faster!
segMSPCArndhc <- hclust(dist(segMSPCArnd@data[,3:5]),method="ward.D2")
plot(2:(length(segMSPCArndhc$height)+1),rev(segMSPCArndhc$height))

plot of chunk unnamed-chunk-5

plot(2:11,rev(segMSPCArndhc$height)[1:10])

plot of chunk unnamed-chunk-5

hclass8 <- cutree(segMSPCArndhc,k=8)
colorCodes <- c("red","green","blue","cyan","yellow","pink","orange","black")
plot(segMSPCArndhc,hang=-1,labels=FALSE,sub="",xlab="",asp=1)
rect.hclust(segMSPCArndhc, k=8,cluster = hclass8,border =colorCodes )
legend("topright",legend=paste0("Cluster_",1:8),cex=0.75,lty=1,
       col=colorCodes)

plot of chunk unnamed-chunk-5

4. Plot in PCA plane

colorines <- mapvalues(hclass8,from=unique(hclass8),to=colorCodes)
plot(segMSPCArnd@data[,3:4],type="n",xlab="PC1",ylab="PC2")
text(segMSPCArnd@data[,3:4],label=hclass1,cex=0.75,col=alpha(colorines,0.5))

plot of chunk unnamed-chunk-6

5. Save results

segMSPCArnd@data$hclass8 <- hclass8
writeOGR(segMSPCArnd,dsn="segMSPCArnd",layer="segMSPCArnd",
         driver="ESRI Shapefile", overwrite=TRUE)
#save(segMSPCArnd,file="segMSPCArnd.rda")