library(cluster)
library(gstat)
sp.theme(TRUE)
data(meuse)
#choose variables which can be compared: heavy metal concentrations
heavy.m = meuse[c("lead", "copper", "zinc", "cadmium")]
##########################
#agglomerative clustering#
##########################
#agglomerative nested clustering of standardized values
#using four different methods for point-cluster distance
hm.average = agnes(heavy.m, stand = TRUE, method = "average")
hm.complete = agnes(heavy.m, stand = TRUE, method = "complete")
hm.single = agnes(heavy.m, stand = TRUE, method = "single")
hm.ward = agnes(heavy.m, stand = TRUE, method = "ward")
#plot dendrogram (second plot)
plot(hm.average)
#plot all dendrograms
par(mfrow = c(2, 2)) #puts together clusters with
pltree(agnes(heavy.m, stand = TRUE, method = "average"), main = "average linkage") #similar average
pltree(agnes(heavy.m, stand = TRUE, method = "complete"), main = "complete linkage") #similar furthest neighbor
pltree(agnes(heavy.m, stand = TRUE, method = "single"), main = "single linkage") #similar nearest neighbor
pltree(agnes(heavy.m, stand = TRUE, method = "ward"), main = "ward's method") #?
par(mfrow = c(1,1))
#function that turns dendrogram into cluster numbers (x dendrogram, n number of cluster or h height), note that gr gives cluster numbers for points ordered as in the dendrogram which has to be permutated to have the right order in group
grouping = function(x, n, h){
if (missing(h))
h = sort(x$height, decreasing = TRUE)[n]
gr = numeric(length(x$order))
i = 1
for (j in 1: length(x$height)){
gr[j] = i
if (x$height[j]> h){i = i + 1}
}
gr[length(x$order)] = i
group = numeric(length(x$order))
o = order(x$order)
for (k in 1:length(x$order)){
group[k] = gr[o[k]]
}
return(group)
}
#add columns with cluster numbers to meuse
meuse$groupaverage4 = grouping(hm.average,4)
meuse$groupcomplete4 = grouping(hm.complete,4)
meuse$groupsingle4 = grouping(hm.single,4)
meuse$groupward4 = grouping(hm.ward,4)
#plot feature space (lead and cadmium;could be zinc and copper, too)
par(mfrow = c(2, 2))
plot(meuse$lead, meuse$cadmium, col = meuse$groupaverage4, main = "average linkage")
plot(meuse$lead, meuse$cadmium, col = meuse$groupcomplete4, main = "complete linkage")
plot(meuse$lead, meuse$cadmium, col = meuse$groupsingle4, main = "single linkage")
plot(meuse$lead, meuse$cadmium, col = meuse$groupward4, main = "wards method")
par(mfrow = c(1,1))
#plot maps
coordinates(meuse) = c("x", "y")
spplot(meuse, c("groupaverage4","groupcomplete4","groupsingle4","groupward4"), cuts = 4)
##################################################
#agglomerative clustering without standardization
hm.average = agnes(heavy.m, stand = FALSE, method = "average")
hm.complete = agnes(heavy.m, stand = FALSE, method = "complete")
hm.single = agnes(heavy.m, stand = FALSE, method = "single")
hm.ward = agnes(heavy.m, stand = FALSE, method = "ward")
#dendrograms, feature space plots, maps can be derived with same commands
#####################
#divisive clustering#
#####################
hm.div = diana(heavy.m, diss = F, metric = "euclidean", stand = T)
meuse$groupdiv4 = grouping(hm.div,4)
plot(hm.div)
plot(meuse$lead, meuse$cadmium, col = meuse$groupdiv4, main = "divisive")
spplot(meuse, "groupdiv4", cuts = 4)
######################
#iterative clustering#
######################
hm.pam4 = pam(heavy.m, 4, diss = F, stand = T, metric = "euclidean", medoids = NULL, stand = T, do.swap = TRUE)
plot(hm.pam4)
meuse$grouppam4 = hm.pam4$clustering
plot(meuse$lead, meuse$cadmium, col = meuse$grouppam4, main = "iterative")
spplot(meuse, "grouppam4", cuts = 4)