###
# code für woche 8 - clustering und dimensionsreduktion
# 250603
###

if (!require(pacman)) install.packages("pacman"); library(pacman)
p_load(cluster)
p_load(ggplot2)
p_load(data.table)

# Daten laden ----
data(mtcars)
car_names = rownames(mtcars)
setDT(mtcars)
# mtcars[, names := car_names]
head(mtcars)

# PCA ----

## Skalieren
# scale(mtcars)
mtcars[, mpg_norm1 := (mpg - mean(mpg))]
mtcars[, mpg_norm2 := (mpg - mean(mpg)) / sd(mpg)]

mtcars[, hp_norm := (hp - mean(hp)) / sd(hp)]

ggplot(mtcars) +
  geom_histogram(aes(x = mpg), bins = 20) +
  geom_histogram(aes(x = hp), bins = 20, fill = "red")

ggplot(mtcars) +
  geom_histogram(aes(x = mpg_norm2), bins = 20) +
  geom_histogram(aes(x = hp_norm), bins = 20, fill = "red")

mtcars_scaled = scale(mtcars)

## Hauptkomponentenanalyse ----
prcomp(mtcars_scaled)


# K means Clustering ----
cluster_result = kmeans(mtcars, centers = 4)

mtcars[, cluster := cluster_result$cluster]
mtcars[, name := car_names]

# plot
ggplot(mtcars) +
  geom_point(aes(x = hp, y = wt, color = cluster)) +
  scale_color_binned(type = "viridis")

