Test for the Analysis of Statistical Separability of litter samples

# anyadir nombre imagen
# calcular nb objetos identificados por fotointerp.
# calcular nb objetos identificados por classif.


knitr::opts_chunk$set(fig.path = "figure/TestDiscAn_")

Required packages

require(MASS)
require(rgdal)
require(raster)
require(plyr)
require(ggplot2)
require(rgeos)

1. Input data

Define your paths for data directories

dirvect <- "../../MedsealitterShapes/flight1-2vector"
dirrast <- "../../Detección basura flotante/Visual/vol 1 (20-59-100-120m)"

Note: for R, it’s better to define directory and file names that do not include spaces and non-ascii characters ### 1.1 Make an array of file names:

vnoms <- list.files(dirvect, patt="*.shp")
vnoms <- unlist(strsplit(vnoms, ".shp"))
length(vnoms)
## [1] 15
head(vnoms)
## [1] "DSC02511" "DSC02518" "DSC02541" "DSC02560" "DSC02567" "DSC02569"

1.2 Read and set correct CRS & Extent

In this test version we process only 1 file

#for (i in 1:length(vnoms)){
k <-1
for (i in 1:k){
  v <- readOGR(dsn=paste0(dirvect,"/",vnoms[i],".shp"),
               layer=vnoms[i],stringsAsFactors = FALSE)
  b <- brick(file.path(dirrast,paste0(vnoms[i],".JPG")))
  projection(b) <- CRS("+init=epsg:25831")
  extent(b) <- c(0,7952,-5304,0)
  projection(v) <- CRS("+init=epsg:25831")
#  r <- rasterize(v,b[[1]],field="id",background=0)
#  writeRaster(r,file="test", format="GTiff",type="INT1U",overwrite=TRUE)
}
## OGR data source with driver: ESRI Shapefile 
## Source: "../../MedsealitterShapes/flight1-2vector/DSC02511.shp", layer: "DSC02511"
## with 105 features and 4 fields
## Feature type: wkbPolygon with 2 dimensions

2. Make a table with raster (RGB) pixel values for each polygon

(table made with all pixels for each polygon)

2.1 Extract raster (RGB) values for each polygon

#NOTE: next command might take a long time:
vd <- extract(b,v,df=TRUE,cellnumbers=TRUE)

If extract() takes too long, we must find another way

names(vd)[3:5] <- c("R","G","B")

Get x, y coordinates

vd <- data.frame(vd,xyFromCell(b,cell=vd$cell))

2.2 Write polygon characteristics (material, color etc) to table

vd$polyID <- mapvalues(vd[,1],from=1:nrow(v@data),to=v@data$id)
vd$ItemSTD <- mapvalues(vd[,1],from=1:nrow(v@data),to=v@data$ItemSTD)
vd$Material <- mapvalues(vd[,1],from=1:nrow(v@data),to=v@data$Material)
vd$Colour <- mapvalues(vd[,1],from=1:nrow(v@data),to=v@data$Colour)
vd[1:3,]
##   ID     cell   R   G   B      x       y polyID ItemSTD Material Colour
## 1  1 13587069 255 255 253 5052.5 -1708.5      4    Drum  Plastic  White
## 2  1 13587070 255 255 255 5053.5 -1708.5      4    Drum  Plastic  White
## 3  1 13587071 253 255 254 5054.5 -1708.5      4    Drum  Plastic  White

Re-arrange columns:

vd <- vd[,c(1,8:11,6:7,3:5)]
#v@data <- cbind(vd[,1],v@data,vd[,-1]) #caution: we extract pixel values, not averages

3 Select water samples

set.seed(1234) #testing only, remove for final version
water <- sampleRandom(b, size=1000,sp=TRUE,xy=TRUE)
plot(v)
plot(water,add=TRUE,col="red")
#writeOGR(water,dsn="water",layer="water",drive="ESRI Shapefile",overwrite=TRUE)

plot of chunk unnamed-chunk-11

3.1 Eliminate water points within polygons (not actually water!)

3.1.1 Make thicker polygons to approach actual objects

vbuf <- gBuffer(v, width = 15, byid=TRUE) #byid=TRUE forces result to be SpPolDF
#writeOGR(vbuf,dsn="vbuf",layer="vbuf",drive="ESRI Shapefile",overwrite=TRUE)
plot(v)
plot(water,add=TRUE,col="red")
plot(vbuf,col="green",add=TRUE)

plot of chunk unnamed-chunk-12

This approach will work if all (most) objects have a digitized polygon. If this is not the case, we could use the convex hull to make sure no random point lies on an object but we would discard a larger part of water (and, actually, that part that is observed under the same view and sun geometry as the objects).

plot(v)
plot(water,add=TRUE,col="red")
plot(gConvexHull(v),add=TRUE)

plot of chunk unnamed-chunk-13

3.1.2 Find and discard random points on buffered polygons

delme <- over(water,vbuf)
dim(delme) #nrow as nb of points in water2
## [1] 1000    4
table(delme$id) #id of polygs: if point is outside, then NA
## 
## 22 88 
##  1  1
inside <- ifelse(is.na(delme$id),FALSE,TRUE)
table(inside) #5 points inside polygs
## inside
## FALSE  TRUE 
##   998     2
water <- water[!inside,]

#Alternative:
#delme <- gContains(v,water,byid=TRUE, returnDense = TRUE)
#delme DF with nrow as nb of points, ncol as nb. of polygs 
#delme records wether a given point lies within a give polygon
#inside <- apply(delme,1,sum)
#table(inside)
#water <- water[!inside,]

4 Make equivalent table as for rest of polygons and append

4.1. Re-arrange table

water@data[1,]
##       x      y DSC02511.1 DSC02511.2 DSC02511.3
## 1 659.5 -603.5         58        109        113
names(water@data)[3:5] <- c("R","G","B")
water@data$ID <- 501:(500+nrow(water))
water@data$polyID <- 500
water@data$Colour <- water@data$Material <- water@data$ItemSTD <- "water"
water@data[1,]
##       x      y  R   G   B  ID polyID ItemSTD Material Colour
## 1 659.5 -603.5 58 109 113 501    500   water    water  water
water@data <- water@data[,c(6:10,1:5)]

4.2 Append water table to that of the rest of materials

water@data[1,]
##    ID polyID ItemSTD Material Colour     x      y  R   G   B
## 1 501    500   water    water  water 659.5 -603.5 58 109 113
vd[1,]
##   ID polyID ItemSTD Material Colour      x       y   R   G   B
## 1  1      4    Drum  Plastic  White 5052.5 -1708.5 255 255 253
vdw <- rbind(vd,water@data)

5. LDA by Material

5.1 Classification and confusion table

vdw.ldaCV <- lda(vdw[,c("R","G","B")],group=vdw$Material,CV=TRUE)
vdw.conf <- table(vdw$Material,vdw.ldaCV$class)
acc <- sum(diag(vdw.conf))/sum(vdw.conf)
sum.truth <- apply(vdw.conf,1,sum)
sum.clas <- apply(vdw.conf,2,sum)
acc.prod <- diag(vdw.conf)/sum.truth
acc.user <- diag(vdw.conf)/sum.clas
w <- 80
options(width=120) #we mke line wider for the table
vdw.conf
##              
##               Aluminium  Cork Glass Keratin Plastic Polystyrene Rubber Tetrapack Tissue water  Wood
##   Aluminium           0     1     0       0     380         116      0         0      0     0     0
##   Cork                0  2941     0       0       0          43      0         0      0     0     0
##   Glass               0     1     0       0      35         181      1         0      0    38     3
##   Keratin             0     0     0       0      92        1003      0         0      0     0     0
##   Plastic             0     3     0       0    5391        1674    311         0      0   249     0
##   Polystyrene         0    78     0       0    2488       16684      0         0      0    14    15
##   Rubber              0   674     0       0     436           0    123         0      0     0     0
##   Tetrapack           1     0     0       0     514          21      8         0      0     0     0
##   Tissue              0     0     0       0    2047           0      0         0      0     1     0
##   water               0     0     0       0       2          75      0         0      0   921     0
##   Wood                0    58     0       0       1         150      0         0      0     0   460
options(width=w) #restore default width

Overall Accuracy:

acc
## [1] 0.7122522

Producer’s and User’s Accuracies:

round(cbind(acc.prod, acc.user),3)
##             acc.prod acc.user
## Aluminium      0.000    0.000
## Cork           0.986    0.783
## Glass          0.000      NaN
## Keratin        0.000      NaN
## Plastic        0.707    0.473
## Polystyrene    0.865    0.836
## Rubber         0.100    0.278
## Tetrapack      0.000      NaN
## Tissue         0.000      NaN
## water          0.923    0.753
## Wood           0.688    0.962

5.2 Represent in LD (aka canonical) space

5.2.1 Calculate and apply transform

vdw.lda <- lda(vdw[,c("R","G","B")],group=vdw$Material)
vdw.ld <- data.matrix(vdw[,c("R","G","B")])%*%vdw.lda$scaling
vdw.ld <- data.frame(Material=vdw$Material,vdw.ld)
vdw.ld[1:3,]
##   Material       LD1      LD2        LD3
## 1  Plastic -6.791683 4.718761 -0.5216837
## 2  Plastic -6.934863 4.633335 -0.5049483
## 3  Plastic -6.792020 4.704696 -0.6018479

5.2.2 Display

#colors to be improved!
#colorines <- c("black","purple","red","blue","cyan","green","orange","lawngreen","seagreen","navy","grey", "orange3", "lightsalmon2", "chocolate4", "coral", "indianred4", "gold","darkred", "brown","forestgreen","dodgerblue2" )
colorines <- c("grey","brown","green","black","cyan","yellow","orange","pink","seagreen","blue","coral")
ggd1 <- ggplot(data=vdw.ld) +
  geom_point(aes(x=LD1,y=LD2,color=Material),size=4) +
  scale_color_manual(values=colorines)
print(ggd1)
#ggsave("vdwLD1LD2.bmp")
ggd2 <- ggplot(data=vdw.ld) +
  geom_point(aes(x=LD2,y=LD3,color=Material),size=4) +
  scale_color_manual(values=colorines)
print(ggd2)
#ggsave("vdwLD2LD3.bmp")

#pairs(vdw.ld[,2:4],col=vdw.ld$Material,cex=0.5,pch=20)
#pairs(vdw.ld[,2:4],col=vdw.ld$Material2,cex=0.5,pch=20)

plot of chunk unnamed-chunk-21 plot of chunk unnamed-chunk-21

5.3 Simplified view: Water and Debries

vdw$Material2 <- vdw$Material
vdw$Material2[vdw$Material!="water"] <- "Debries"
table(vdw$Material2)
## 
## Debries   water 
##   36236     998
vdw.ld$Material2 <- vdw$Material2
vdw.ldaCV$classb <- as.character(vdw.ldaCV$class)
vdw.ldaCV$classb[vdw.ldaCV$classb!="water"] <- "Debries"
options(width=120)
table(vdw$Material2,vdw.ldaCV$classb)
##          
##           Debries water
##   Debries   35934   302
##   water        77   921
options(width=w) 
vdw.confb <- table(vdw$Material2)
accb <- sum(diag(vdw.confb))/sum(vdw.confb)
sum.truthb <- apply(vdw.confb,1,sum)
sum.clasb <- apply(vdw.confb,2,sum)
## Error in if (d2 == 0L) {: missing value where TRUE/FALSE needed
acc.prodb <- diag(vdw.confb)/sum.truthb
acc.userb <- diag(vdw.confb)/sum.clasb
vdw.confb
## 
## Debries   water 
##   36236     998

Overall Accuracy:

accb
## [1] 1

Producer’s and User’s Accuracies

round(cbind(acc.prodb, acc.userb),3)
##      [,1] [,2]  [,3]  [,4]
## [1,]    1    0 1.006 0.000
## [2,]    0    1 0.000 0.816
ggplot(data=vdw.ld) +
  geom_point(aes(x=LD1,y=LD2,color=Material2),size=4)
ggplot(data=vdw.ld) +
  geom_point(aes(x=LD2,y=LD3,color=Material2),size=4)

plot of chunk unnamed-chunk-24 plot of chunk unnamed-chunk-24

6. LDA by Material (water and Debries)

Note: this is NOT the same as the Simplified Vision above

vdw.lda2CV <- lda(vdw[,c("R","G","B")],group=vdw$Material2,CV=TRUE)
vdw.conf2 <- table(vdw$Material2,vdw.lda2CV$class)
acc2 <- sum(diag(vdw.conf2))/sum(vdw.conf2)
sum.truth2 <- apply(vdw.conf2,1,sum)
sum.clas2 <- apply(vdw.conf2,2,sum)
acc.prod2 <- diag(vdw.conf2)/sum.truth2
acc.user2 <- diag(vdw.conf2)/sum.clas2
vdw.conf2
##          
##           Debries water
##   Debries   35954   282
##   water        80   918

Overall Accuracy:

acc2
## [1] 0.9902777

Producer’s and User’s Accuracies

round(cbind(acc.prod2, acc.user2),3)
##         acc.prod2 acc.user2
## Debries     0.992     0.998
## water       0.920     0.765

7. LDA by Colour

vdw.ldaColCV <- lda(vdw[,c("R","G","B")],group=vdw$Colour,CV=TRUE)
vdw.confCol <- table(vdw$Colour,vdw.ldaColCV$class)
accCol <- sum(diag(vdw.confCol))/sum(vdw.confCol)
sum.truthCol <- apply(vdw.confCol,1,sum)
sum.clasCol <- apply(vdw.confCol,2,sum)
acc.prodCol <- diag(vdw.confCol)/sum.truthCol
acc.userCol <- diag(vdw.confCol)/sum.clasCol

options(width=140) #we mke line wider for the table
vdw.confCol
##                 
##                  Black Brown Green Mixed   Red Transparent water White White-blue White-green White-red-blue
##   Black          11792     1     2     0     0          11    38   808          0           0              0
##   Brown            790  9887   131     0    13           1     0   295          0           0              0
##   Green            148   271   562     0     0         132    94   731          0           0              0
##   Mixed             28    47     7     0    11           0     0   219          0           0              0
##   Red               39    18     0     0   314           0     0   160          0           0              0
##   Transparent      375     0     0     0     0         737   147   590          0           0              0
##   water             72     0     0     0     0           1   923     2          0           0              0
##   White             47     0     0     0     0         209     1  6899          0           0              0
##   White-blue        42     0     0     0     0           6     0   223          0           0              0
##   White-green        0     0     5     0     0          15     0   253          0           0              0
##   White-red-blue     0     0     0     0     0           0     0   137          0           0              0
options(width=w) #restore default width

Overall Accuracy:

accCol
## [1] 0.8356341

Producer’s and User’s Accuracies

round(cbind(acc.prodCol, acc.userCol),3)
##                acc.prodCol acc.userCol
## Black                0.932       0.884
## Brown                0.889       0.967
## Green                0.290       0.795
## Mixed                0.000         NaN
## Red                  0.591       0.929
## Transparent          0.399       0.663
## water                0.925       0.767
## White                0.964       0.669
## White-blue           0.000         NaN
## White-green          0.000         NaN
## White-red-blue       0.000         NaN
colorinesCol <- c("black","brown","green","yellow","red","grey100","blue","grey","cyan","lightgreen","pink")

vdw.ldaCol <- lda(vdw[,c("R","G","B")],group=vdw$Colour)
vdw.ldCol <- data.matrix(vdw[,c("R","G","B")])%*%vdw.ldaCol$scaling
vdw.ldCol <- data.frame(Colour=vdw$Colour,vdw.ldCol)

ggd3 <- ggplot(data=vdw.ldCol) +
  geom_point(aes(x=LD1,y=LD2,color=Colour),size=3)  +
  scale_color_manual(values=colorinesCol)
print(ggd3)

plot of chunk unnamed-chunk-30

ggd4 <- ggplot(data=vdw.ldCol) +
  geom_point(aes(x=LD2,y=LD3,color=Colour),size=4)  +
  scale_color_manual(values=colorinesCol)
print(ggd4)

plot of chunk unnamed-chunk-30

rm(ggd1, ggd2,ggd3,ggd4)
#Just to make sure color legend is correct (paranoid!)
# ggplot(data=vdw.ldCol) +
#   geom_text(aes(x=LD1,y=LD2,color=Colour,label=Colour),size=2)  +
#   scale_color_manual(values=colorinesCol)

#pairscols <- mapvalues(vdw.ldCol$Colour, from=sort(unique(as.character(vdw.ldCol$Colour))), to=colorinesCol)
#pairs(vdw.ldCol[,-1],col=pairscols,cex=1,pch=20) #colors are not correct, do not understand why!

8. Save as vector Shape file

vdw$classMat <- vdw.ldaCV$class
vdw$classb <- vdw.ldaCV$classb
vdw$class2 <- vdw.lda2CV$class
vdw$classCOl <- vdw.ldaColCV$class
vdw[1,]
##   ID polyID ItemSTD Material Colour      x       y   R   G   B Material2
## 1  1      4    Drum  Plastic  White 5052.5 -1708.5 255 255 253   Debries
##   classMat  classb  class2 classCOl
## 1  Plastic Debries Debries    White
vdwSPDF <- vdw
coordinates(vdwSPDF) <- cbind(vdwSPDF$x,vdwSPDF$y) #convert to SpPointsDF
projection(vdwSPDF) <- projection(v)
writeOGR(vdwSPDF, dsn=paste0(vnoms[i],"vclass"),layer=paste0(vnoms[i],"vclass"),
         driver="ESRI Shapefile", overwrite=TRUE)

9. LD images

We use the LDA by color as results were better

names(b) <- c("R","G","B") #band names must match names used in lda() for predict() to work
#next commabd takes too long a time...
#bld <- predict(b,vdw.ldaCol)
#writeRaster(bld,filename=vnoms[i],format="GTiff",overwrite=TRUE)
#bld <- calc(b,function(x)x%*%vdw.ldaCol$scaling)