Librerias aplicadas

library(caret)
Loading required package: ggplot2
Loading required package: lattice
library(raster)

Carga de las bandas de imágenes satelitales.

band1 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B1.TIF")
band2 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B2.TIF")
band3 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B3.TIF")
band4 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B4.TIF")
band5 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B5.TIF")
band6 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B6.TIF")
band7 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B7.TIF")
band10 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B10.TIF")
band11 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B11.TIF")

roma_rst_lan <- brick(band1,band2,band3,band4,band5,band6,band7,band10,band11)
names(roma_rst_lan) <- c(paste("Banda",1:7,sep=""),"Banda10","Banda11")

Procesado de datos

#Carga de muestras de entrenamiento
rstTrainroma <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/DataTraining/rome_lcz_GT.tif")

# Remover ceros (fondo NA)
rstTrainroma[rstTrainroma==0] <- NA

# Convertir a tipo discreto
rstTrainroma <- ratify(rstTrainroma)

# Cambiamos el nombre
names(rstTrainroma) <- "layer"

#Muestras de valores
table(values(rstTrainroma))

   2    3    5    6    8   10   11   12   14   17 
1551  104 1495  480  435   51  284  555  984  500 
# Eliminación de NAs 
rstDFlanroma.df <- na.omit(values(stack(rstTrainroma, roma_rst_lan)))

# Viualizamos las muestras
cols <- c("#fde725","#b5de2b", "#6ece58","#35b779", "#1f9e89","#26828e", "#31688e", "#3e4989", "#482878", "#440154")
plot(rstTrainroma, main="Muestras de entrenamiento de Roma", col = cols)

NA
NA

Fase de Clasificación

#Si es la primera vez se ejecuta, sino ya esta guardado los datos del dataframe.
#write.csv(rstDFlanroma.df,"rstDFRoma.csv", row.names = FALSE)

#Leemos los datos
samplesLanroma = read.csv("rstDFRoma.csv")

# Se divide las muestras en 70 y 30
trainx = list(0)
evalx = list(0)
for (i in 1:17){ # se itera en todas las posibles categorías
  cls = samplesLanroma[samplesLanroma$layer == i,]
  smpl <- floor(0.70 * nrow(cls))
  tt <- sample(seq_len(nrow(cls)), size = smpl)
  trainx[[i]] <- cls[tt,]
  evalx[[i]] <- cls[-tt,]
}


# Se combina en dataframe referenciados s entrenamiento y evaluación
trn = do.call(rbind, trainx) 
eva = do.call(rbind, evalx)

Algoritmo Random Forest

# Definiendo el control del entrenamiento
fitControl <- trainControl(method = "repeatedcv", # repeated cross-validation of the training data
                   number = 10, # number of folds
                   repeats = 5) # view the training iterations


# Entrenamiento con rf
rf_model_roma<- caret::train(x = trn[,(2:ncol(trn))], y = as.factor(trn$layer),
                        method = "rf", 
                        metric="Accuracy", 
                        trControl = fitControl,
                        preProcess = c("center", "scale"),
                        tuneLength  = 8)

rf_grid <- expand.grid(mtry=1:20)

rf_model_roma2<- caret::train(x = trn[,(2:ncol(trn))], y = as.factor(trn$layer),
                        method = "rf", 
                        metric="Accuracy", 
                        trControl = fitControl,
                        preProcess = c("center", "scale"),
                        tuneGrid = rf_grid)

Exploración del modelo

#Información modelo rf
print(rf_model_roma)
Random Forest 

4502 samples
   9 predictor
  10 classes: '2', '3', '5', '6', '8', '10', '11', '12', '14', '17' 

Pre-processing: centered (9), scaled (9) 
Resampling: Cross-Validated (10 fold, repeated 5 times) 
Summary of sample sizes: 4050, 4054, 4051, 4053, 4053, 4050, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
  2     0.7745059  0.7295556
  3     0.7741051  0.7291448
  4     0.7738406  0.7288910
  5     0.7754840  0.7309457
  6     0.7734405  0.7284506
  7     0.7733511  0.7283656
  8     0.7726836  0.7276080
  9     0.7705513  0.7250804

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 5.
print(rf_model_roma$finalModel)

Call:
 randomForest(x = x, y = y, mtry = min(param$mtry, ncol(x))) 
               Type of random forest: classification
                     Number of trees: 500
No. of variables tried at each split: 5

        OOB estimate of  error rate: 22.48%
Confusion matrix:
     2  3   5   6   8 10  11  12  14  17 class.error
2  847 12 199  10  14  1   0   0   2   0  0.21935484
3   32 20  15   4   1  0   0   0   0   0  0.72222222
5  206  4 713  54  25  4   1  16  23   0  0.31835564
6   11  2  63 198   0  1   0  42  19   0  0.41071429
8   14  0  38   1 247  0   0   0   4   0  0.18750000
10  11  0   9   3   0 12   0   0   0   0  0.65714286
11   0  0   1   0   0  0 191   5   1   0  0.03535354
12   1  0  35  19   0  1   5 292  35   0  0.24742268
14   0  0  23  15   2  0   0  28 620   0  0.09883721
17   0  0   0   0   0  0   0   0   0 350  0.00000000
#Gráfica del modelo rf
plot(rf_model_roma)


print(rf_model_roma2) #modelo 2
Random Forest 

4502 samples
   9 predictor
  10 classes: '2', '3', '5', '6', '8', '10', '11', '12', '14', '17' 

Pre-processing: centered (9), scaled (9) 
Resampling: Cross-Validated (10 fold, repeated 5 times) 
Summary of sample sizes: 4051, 4049, 4051, 4050, 4052, 4053, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   1    0.7728575  0.7273050
   2    0.7760999  0.7314324
   3    0.7777427  0.7335153
   4    0.7776546  0.7334634
   5    0.7773410  0.7330797
   6    0.7759660  0.7314695
   7    0.7761865  0.7317788
   8    0.7751712  0.7305626
   9    0.7745039  0.7297882
  10    0.7738802  0.7290222
  11    0.7744128  0.7296807
  12    0.7737875  0.7289090
  13    0.7753915  0.7308690
  14    0.7738345  0.7289740
  15    0.7733431  0.7283518
  16    0.7742819  0.7295094
  17    0.7740562  0.7292300
  18    0.7747702  0.7300851
  19    0.7739248  0.7290817
  20    0.7740556  0.7292078

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 3.
print(rf_model_roma2$finalModel)

Call:
 randomForest(x = x, y = y, mtry = min(param$mtry, ncol(x))) 
               Type of random forest: classification
                     Number of trees: 500
No. of variables tried at each split: 3

        OOB estimate of  error rate: 22.12%
Confusion matrix:
     2  3   5   6   8 10  11  12  14  17 class.error
2  862 12 183  10  15  1   0   0   2   0  0.20552995
3   38 17  13   4   0  0   0   0   0   0  0.76388889
5  207  3 717  51  23  4   1  16  24   0  0.31453155
6   12  2  56 197   0  1   0  43  25   0  0.41369048
8   12  0  39   1 250  0   0   0   2   0  0.17763158
10  11  0  11   3   0 10   0   0   0   0  0.71428571
11   0  0   1   0   0  0 190   6   1   0  0.04040404
12   1  0  34  17   0  1   4 295  36   0  0.23969072
14   0  0  23  15   1  0   0  31 618   0  0.10174419
17   0  0   0   0   0  0   0   0   0 350  0.00000000
plot(rf_model_roma2) #modelo 2

Evaluación del modelo 1.

pred_rf_model_roma<- predict(rf_model_roma,newdata = eva[,-1])


cm <- confusionMatrix(data = pred_rf_model_roma, as.factor(eva$layer))
cm
Confusion Matrix and Statistics

          Reference
Prediction   2   3   5   6   8  10  11  12  14  17
        2  374  14  77   3   7   2   0   0   1   0
        3    1  14   1   3   0   0   0   0   0   0
        5   85   3 320  19  18  10   1  14   3   0
        6    2   1  23 101   1   1   0   9   6   0
        8    3   0   8   0 105   0   0   0   0   0
        10   0   0   0   0   0   2   0   0   0   0
        11   0   0   0   0   0   0  80   2   0   0
        12   0   0  14  11   0   0   3 122  13   0
        14   1   0   6   7   0   1   2  20 273   0
        17   0   0   0   0   0   0   0   0   0 150

Overall Statistics
                                          
               Accuracy : 0.7956          
                 95% CI : (0.7769, 0.8133)
    No Information Rate : 0.2406          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.7551          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 2 Class: 3 Class: 5 Class: 6 Class: 8 Class: 10
Sensitivity            0.8026 0.437500   0.7127  0.70139  0.80153  0.125000
Specificity            0.9293 0.997375   0.8972  0.97602  0.99391  1.000000
Pos Pred Value         0.7824 0.736842   0.6765  0.70139  0.90517  1.000000
Neg Pred Value         0.9369 0.990615   0.9119  0.97602  0.98572  0.992765
Prevalence             0.2406 0.016520   0.2318  0.07434  0.06763  0.008260
Detection Rate         0.1931 0.007228   0.1652  0.05214  0.05421  0.001033
Detection Prevalence   0.2468 0.009809   0.2442  0.07434  0.05989  0.001033
Balanced Accuracy      0.8659 0.717438   0.8049  0.83870  0.89772  0.562500
                     Class: 11 Class: 12 Class: 14 Class: 17
Sensitivity            0.93023   0.73054    0.9223   1.00000
Specificity            0.99892   0.97684    0.9775   1.00000
Pos Pred Value         0.97561   0.74847    0.8806   1.00000
Neg Pred Value         0.99677   0.97463    0.9859   1.00000
Prevalence             0.04440   0.08622    0.1528   0.07744
Detection Rate         0.04130   0.06298    0.1409   0.07744
Detection Prevalence   0.04233   0.08415    0.1600   0.07744
Balanced Accuracy      0.96458   0.85369    0.9499   1.00000

Representación de Matriz de confusión

cm_d <- as.data.frame(cm$table)

cm_st <-data.frame(cm$overall)

cm_st$cm.overall <- round(cm_st$cm.overall,2)

cm_p <- as.data.frame(prop.table(cm$table))
cm_d$Perc <- round(cm_p$Freq*100,2)

# dibujando la matriz
ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
  geom_tile(color = "white", lwd = 0.5) +
  geom_text(aes(label = paste("",Freq)), color = 'white', size = 3.5)+ 
  theme_minimal() +
  ggtitle("Matriz de Confusión")

Evaluación del modelo 2

pred_rf_model_roma2<- predict(rf_model_roma2,newdata = eva[,-1])


cm <- confusionMatrix(data = pred_rf_model_roma2, as.factor(eva$layer))
cm
Confusion Matrix and Statistics

          Reference
Prediction   2   3   5   6   8  10  11  12  14  17
        2  375  17  78   4   6   2   0   0   2   0
        3    0  11   0   1   0   0   0   0   0   0
        5   85   3 320  21  20  10   1  14   2   0
        6    2   1  24 101   1   1   0  10   4   0
        8    3   0   8   0 104   0   0   0   0   0
        10   0   0   0   0   0   2   0   0   0   0
        11   0   0   0   0   0   0  80   2   0   0
        12   0   0  13  12   0   0   3 120  13   0
        14   1   0   6   5   0   1   2  21 275   0
        17   0   0   0   0   0   0   0   0   0 150

Overall Statistics
                                          
               Accuracy : 0.794           
                 95% CI : (0.7753, 0.8118)
    No Information Rate : 0.2406          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.7529          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 2 Class: 3 Class: 5 Class: 6 Class: 8 Class: 10
Sensitivity            0.8047 0.343750   0.7127  0.70139  0.79389  0.125000
Specificity            0.9259 0.999475   0.8952  0.97602  0.99391  1.000000
Pos Pred Value         0.7748 0.916667   0.6723  0.70139  0.90435  1.000000
Neg Pred Value         0.9374 0.989091   0.9117  0.97602  0.98518  0.992765
Prevalence             0.2406 0.016520   0.2318  0.07434  0.06763  0.008260
Detection Rate         0.1936 0.005679   0.1652  0.05214  0.05369  0.001033
Detection Prevalence   0.2499 0.006195   0.2457  0.07434  0.05937  0.001033
Balanced Accuracy      0.8653 0.671613   0.8039  0.83870  0.89390  0.562500
                     Class: 11 Class: 12 Class: 14 Class: 17
Sensitivity            0.93023   0.71856    0.9291   1.00000
Specificity            0.99892   0.97684    0.9781   1.00000
Pos Pred Value         0.97561   0.74534    0.8842   1.00000
Neg Pred Value         0.99677   0.97354    0.9871   1.00000
Prevalence             0.04440   0.08622    0.1528   0.07744
Detection Rate         0.04130   0.06195    0.1420   0.07744
Detection Prevalence   0.04233   0.08312    0.1606   0.07744
Balanced Accuracy      0.96458   0.84770    0.9536   1.00000

Representación de Matriz de confusión

cm_d <- as.data.frame(cm$table)

cm_st <-data.frame(cm$overall)

cm_st$cm.overall <- round(cm_st$cm.overall,2)

cm_p <- as.data.frame(prop.table(cm$table))
cm_d$Perc <- round(cm_p$Freq*100,2)

# dibujando la matriz
ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
  geom_tile(color = "white", lwd = 0.5) +
  geom_text(aes(label = paste("",Freq)), color = 'white', size = 3.5)+ 
  theme_minimal() +
  ggtitle("Matriz de Confusión")

Predicción y visualización del modelo 1.

classi_rf_model_roma <- raster::predict(roma_rst_lan, model=rf_model_roma)
plot(classi_rf_model_roma, main="Clasificación Random Forest Modelo 1",col = cols)

Predicción y visualización del modelo 2.

classi_rf_model_roma2 <- raster::predict(roma_rst_lan, model=rf_model_roma2)
plot(classi_rf_model_roma2, main="Clasificación Random Forest Modelo 2",col = cols)

---
title: "Clasificación de imágenes con Random Forest"
output: html_notebook
---

# Librerias aplicadas

```{r}
library(caret)
library(raster)
```

# Carga de las bandas de imágenes satelitales.

```{r}
band1 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B1.TIF")
band2 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B2.TIF")
band3 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B3.TIF")
band4 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B4.TIF")
band5 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B5.TIF")
band6 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B6.TIF")
band7 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B7.TIF")
band10 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B10.TIF")
band11 <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/capturas-landsat/roma/LC81910312013208LGN00/LC81910312013208LGN00_B11.TIF")

roma_rst_lan <- brick(band1,band2,band3,band4,band5,band6,band7,band10,band11)
names(roma_rst_lan) <- c(paste("Banda",1:7,sep=""),"Banda10","Banda11")
```

# Procesado de datos

```{r}
#Carga de muestras de entrenamiento
rstTrainroma <- raster("/Users/elianaalizchuquillanquijulcapari/Documents/workspace-r/demos/DataTraining/rome_lcz_GT.tif")

# Remover ceros (fondo NA)
rstTrainroma[rstTrainroma==0] <- NA

# Convertir a tipo discreto
rstTrainroma <- ratify(rstTrainroma)

# Cambiamos el nombre
names(rstTrainroma) <- "layer"

#Muestras de valores
table(values(rstTrainroma))

# Eliminación de NAs 
rstDFlanroma.df <- na.omit(values(stack(rstTrainroma, roma_rst_lan)))

# Viualizamos las muestras
cols <- c("#fde725","#b5de2b", "#6ece58","#35b779", "#1f9e89","#26828e", "#31688e", "#3e4989", "#482878", "#440154")
plot(rstTrainroma, main="Muestras de entrenamiento de Roma", col = cols)


```

# Fase de Clasificación

```{r}
#Si es la primera vez se ejecuta, sino ya esta guardado los datos del dataframe.
#write.csv(rstDFlanroma.df,"rstDFRoma.csv", row.names = FALSE)

#Leemos los datos
samplesLanroma = read.csv("rstDFRoma.csv")

# Se divide las muestras en 70 y 30
trainx = list(0)
evalx = list(0)
for (i in 1:17){ # se itera en todas las posibles categorías
  cls = samplesLanroma[samplesLanroma$layer == i,]
  smpl <- floor(0.70 * nrow(cls))
  tt <- sample(seq_len(nrow(cls)), size = smpl)
  trainx[[i]] <- cls[tt,]
  evalx[[i]] <- cls[-tt,]
}


# Se combina en dataframe referenciados s entrenamiento y evaluación
trn = do.call(rbind, trainx) 
eva = do.call(rbind, evalx)
```

# Algoritmo Random Forest

```{r}
# Definiendo el control del entrenamiento
fitControl <- trainControl(method = "repeatedcv", # repeated cross-validation of the training data
                   number = 10, # number of folds
                   repeats = 5) # view the training iterations


# Entrenamiento con rf
rf_model_roma<- caret::train(x = trn[,(2:ncol(trn))], y = as.factor(trn$layer),
                        method = "rf", 
                        metric="Accuracy", 
                        trControl = fitControl,
                        preProcess = c("center", "scale"),
                        tuneLength  = 8)

rf_grid <- expand.grid(mtry=1:20)

rf_model_roma2<- caret::train(x = trn[,(2:ncol(trn))], y = as.factor(trn$layer),
                        method = "rf", 
                        metric="Accuracy", 
                        trControl = fitControl,
                        preProcess = c("center", "scale"),
                        tuneGrid = rf_grid)

```

# Exploración del modelo

```{r}
#Información modelo rf
print(rf_model_roma)

print(rf_model_roma$finalModel)

#Gráfica del modelo rf
plot(rf_model_roma)

print(rf_model_roma2) #modelo 2

print(rf_model_roma2$finalModel)

plot(rf_model_roma2) #modelo 2
```


# Evaluación del modelo 1.

```{r}
pred_rf_model_roma<- predict(rf_model_roma,newdata = eva[,-1])


cm <- confusionMatrix(data = pred_rf_model_roma, as.factor(eva$layer))
cm
```

# Representación de Matriz de confusión

```{r}
cm_d <- as.data.frame(cm$table)

cm_st <-data.frame(cm$overall)

cm_st$cm.overall <- round(cm_st$cm.overall,2)

cm_p <- as.data.frame(prop.table(cm$table))
cm_d$Perc <- round(cm_p$Freq*100,2)

# dibujando la matriz
ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
  geom_tile(color = "white", lwd = 0.5) +
  geom_text(aes(label = paste("",Freq)), color = 'white', size = 3.5)+ 
  theme_minimal() +
  ggtitle("Matriz de Confusión")
```

# Evaluación del modelo 2

```{r}
pred_rf_model_roma2<- predict(rf_model_roma2,newdata = eva[,-1])


cm <- confusionMatrix(data = pred_rf_model_roma2, as.factor(eva$layer))
cm
```

# Representación de Matriz de confusión

```{r}
cm_d <- as.data.frame(cm$table)

cm_st <-data.frame(cm$overall)

cm_st$cm.overall <- round(cm_st$cm.overall,2)

cm_p <- as.data.frame(prop.table(cm$table))
cm_d$Perc <- round(cm_p$Freq*100,2)

# dibujando la matriz
ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
  geom_tile(color = "white", lwd = 0.5) +
  geom_text(aes(label = paste("",Freq)), color = 'white', size = 3.5)+ 
  theme_minimal() +
  ggtitle("Matriz de Confusión")
```

# Predicción y visualización del modelo 1.

```{r}
classi_rf_model_roma <- raster::predict(roma_rst_lan, model=rf_model_roma)
plot(classi_rf_model_roma, main="Clasificación Random Forest Modelo 1",col = cols)
```
# Predicción y visualización del modelo 2.

```{r}
classi_rf_model_roma2 <- raster::predict(roma_rst_lan, model=rf_model_roma2)
plot(classi_rf_model_roma2, main="Clasificación Random Forest Modelo 2",col = cols)
```


