10. Um método simples e indireto para estimação da área de folha de linho

% Analysis

1 Libraries

To reproduce the examples of this material, the R packages the following packages are needed.

2 Data


df_af <- rio::import("data/leaf_flax_pred.xlsx")
descritiva <- 
  df_af |> 
  summarise(across(area:width, .fns = list(media = mean, min = min, max = max)))

descritiva
##   area_media   area_min area_max length_media length_min length_max width_media
## 1  0.5909433 0.04281334 1.907076     2.142723   0.511989    4.37025   0.3322159
##    width_min width_max
## 1 0.09535828 0.5995659

p1 <- 
ggplot(df_af, aes(length, area)) + 
  geom_point( size = 3, alpha = 0.5, color = "brown") +
  labs(x = "Comprimento da folha (cm)",
       y = expression(Área~foliar~(cm^2~folha^{-1}))) +
  theme_bw(base_size = 16)
ggMarginal(p1, color = "brown", size = 7, linewidth = 1)


p2 <- 
ggplot(df_af, aes(width, area)) + 
  geom_point( size = 3, alpha = 0.5, color = "brown") +
  labs(x = "Largura da folha (cm)",
       y = expression(Área~foliar~(cm^2~folha^{-1}))) +
  theme_bw(base_size = 16)
ggMarginal(p2, color = "brown", size = 7, linewidth = 1)



# functions
# concordance correlation coefficient
get_ccc <- function(df, predicted, real){
  if(is.grouped_df(df)){
    df %>% 
      group_modify(~get_ccc(.x, {{predicted}}, {{real}})) %>% 
      ungroup()
  } else{
    predicted <- pull(df, {{predicted}})
    real <- pull(df, {{real}})
    cor <- CCC(real, predicted, na.rm = TRUE)
    data.frame(r = cor(real, predicted),
               pc = cor$rho.c[[1]],
               lwr_ci = cor$rho.c[[2]],
               upr_ci = cor$rho.c[[3]],
               bc = cor$C.b)
  }
}

3 Model


control <-
  trainControl(method = 'cv',
               p = 0.7,
               number = 10,
               verboseIter = TRUE,
               savePredictions = "all")

fit <- train(area ~  length + length:width ,
             method = "lm",
             data = df_af,
             trControl = control)
## + Fold01: intercept=TRUE 
## - Fold01: intercept=TRUE 
## + Fold02: intercept=TRUE 
## - Fold02: intercept=TRUE 
## + Fold03: intercept=TRUE 
## - Fold03: intercept=TRUE 
## + Fold04: intercept=TRUE 
## - Fold04: intercept=TRUE 
## + Fold05: intercept=TRUE 
## - Fold05: intercept=TRUE 
## + Fold06: intercept=TRUE 
## - Fold06: intercept=TRUE 
## + Fold07: intercept=TRUE 
## - Fold07: intercept=TRUE 
## + Fold08: intercept=TRUE 
## - Fold08: intercept=TRUE 
## + Fold09: intercept=TRUE 
## - Fold09: intercept=TRUE 
## + Fold10: intercept=TRUE 
## - Fold10: intercept=TRUE 
## Aggregating results
## Fitting final model on full training set
print(fit)
## Linear Regression 
## 
## 3522 samples
##    2 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 3170, 3169, 3170, 3170, 3170, 3170, ... 
## Resampling results:
## 
##   RMSE        Rsquared   MAE       
##   0.01418923  0.9982461  0.01148978
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
mod <- fit$finalModel
check_model(mod)

4 Predictions

library(ggpubr)

df_af <- 
  df_af |> 
  mutate(pred = predict(fit$finalModel))
get_ccc(df_af, area, pred)
##           r        pc    lwr_ci    upr_ci        bc
## 1 0.9991208 0.9991204 0.9990604 0.9991766 0.9999996


# 1:1 concordance plot
ggplot(df_af, aes(area, pred)) +
  geom_point(alpha = 0.2, color = "brown", size = 3) +
  geom_abline(intercept = 0, slope = 1, color = "red", linewidth = 1) +
  coord_equal() +
  xlim(c(0, 2)) +
  ylim(c(0, 2)) +
  labs(y = expression(Área~foliar~observada~(cm^2~folha^{-1})),
       x = expression(Área~foliar~predita~(cm^2~folha^{-1}))) +
  theme_bw(base_size = 16)


ggsave("figs/pred_af.jpg", dpi = 600)