Analysis
1 Libraries
To reproduce the examples of this material, the R packages the following packages are needed.
library(EnvRtype)
library(rio)
library(ggh4x)
library(ggridges)
library(tidyverse)
library(metan)
library(lmerTest)
library(lme4)
library(broom)
library(broom.mixed)
<-
my_theme theme_bw() +
theme(panel.spacing = unit(0, "cm"),
panel.grid = element_blank(),
legend.position = "bottom")
2 Datasets
2.1 Traits
<-
df_traits import("https://bit.ly/df_barley") |>
as_factor(1:6)
2.2 Climate variables
2.2.1 Scripts to gather data
<- import("https://bit.ly/3RvXlgj")
df_env <- df_env$env |> as_character()
ENV <- df_env$Lat
LAT <- df_env$Lon
LON <- df_env$Alt
ALT <- df_env$Sowing
START <- df_env$Harvesting
END
# see more at https://github.com/allogamous/EnvRtype
<-
df_climate get_weather(env.id = ENV,
lat = LAT,
lon = LON,
start.day = START,
end.day = END)
# GDD: Growing Degree Day (oC/day)
# FRUE: Effect of temperature on radiation use efficiency (from 0 to 1)
# T2M_RANGE: Daily Temperature Range (oC day)
# SPV: Slope of saturation vapour pressure curve (kPa.Celsius)
# VPD: Vapour pressure deficit (kPa)
# ETP: Potential Evapotranspiration (mm.day)
# PEPT: Deficit by Precipitation (mm.day)
# n: Actual duration of sunshine (hour)
# N: Daylight hours (hour)
# RTA: Extraterrestrial radiation (MJ/m^2/day)
# SRAD: Solar radiation (MJ/m^2/day)
# T2M: Temperature at 2 Meters
# T2M_MAX: Maximum Temperature at 2 Meters
# T2M_MIN: Minimum Temperature at 2 Meters
# PRECTOT: Precipitation
# WS2M: Wind Speed at 2 Meters
# RH2M: Relative Humidity at 2 Meters
# T2MDEW: Dew/Frost Point at 2 Meters
# ALLSKY_SFC_LW_DWN: Downward Thermal Infrared (Longwave) Radiative Flux
# ALLSKY_SFC_SW_DWN: All Sky Insolation Incident on a Horizontal Surface
# ALLSKY_TOA_SW_DWN: Top-of-atmosphere Insolation
# [1] "env" "ETP" "GDD" "PETP" "RH2M" "SPV"
# [8] "T2M" "T2M_MAX" "T2M_MIN" "T2M_RANGE" "T2MDEW" "VPD"
# Compute other parameters
<-
env_data %>%
df_climate as.data.frame() %>%
param_temperature(Tbase1 = 5, # choose the base temperature here
Tbase2 = 28, # choose the base temperature here
merge = TRUE) %>%
param_atmospheric(merge = TRUE) %>%
param_radiation(merge = TRUE)
2.2.2 Tidy climate data
<- import("https://bit.ly/env_data_nasapower")
env_data str(env_data)
## 'data.frame': 337 obs. of 19 variables:
## $ env : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ LON : num 30.5 30.5 30.5 30.5 30.5 ...
## $ LAT : num 31.1 31.1 31.1 31.1 31.1 ...
## $ YEAR : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ MM : int 11 11 11 11 11 11 11 11 11 11 ...
## $ DD : int 12 13 14 15 16 17 18 19 20 21 ...
## $ DOY : int 316 317 318 319 320 321 322 323 324 325 ...
## $ YYYYMMDD : chr "12/11/2019" "13/11/2019" "14/11/2019" "15/11/2019" ...
## $ daysFromStart: int 1 2 3 4 5 6 7 8 9 10 ...
## $ tmean : num 24.6 24.4 23.2 22.4 21.4 ...
## $ tmax : num 33.5 32.8 29.9 27.6 26.9 ...
## $ tmin : num 18 17.1 17.9 19.1 17.2 ...
## $ prec : num 0 0 0.05 0.06 0.06 0.01 0 0 0 0 ...
## $ rh : num 49.1 53.8 51.1 60.6 64.4 ...
## $ trange : num 15.52 15.68 11.97 8.45 9.77 ...
## $ vpd : num 2.42 2.11 1.79 1.38 1.17 ...
## $ svpc : num 0.196 0.188 0.178 0.173 0.162 ...
## $ etp : num 6.77 6.58 6.28 6.23 6.09 ...
## $ dbp : num -6.77 -6.58 -6.23 -6.17 -6.03 ...
<- names(env_data)[10:19] id_var
3 Scripts
3.1 Correlation analysis of climate variables
|>
env_data select_cols(all_of(id_var)) |>
corr_coef() |>
network_plot(min_cor = 0.3,
legend_position = "bottom")
ggsave("figs/fig1_network.png", width = 5, height = 5)
3.2 Distribution of climate variables
# long format for climate data
<-
env_data_d |>
env_data remove_cols(LON:YYYYMMDD, daysFromStart) |>
pivot_longer(-env)
# long format for grain yield
<-
df_gy_dist |>
df_traits select(YEARS, GY) |>
mutate(name = "GY", .after = YEARS) |>
rename(value = GY,
env = YEARS)
# bind climate and GY
<- rbind(df_gy_dist, env_data_d)
env_data_d # mean values for each trait
<- means_by(env_data_d, name)
env_data_mean
ggplot(env_data_d, aes(x = value, y = env, fill = env)) +
geom_density_ridges(scale = 0.9) +
geom_vline(data = env_data_mean,
aes(xintercept = value),
linetype = 2) +
facet_grid(~name, scales = "free") +
theme(panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_blank()) +
scale_y_discrete(expand = expansion(c(0, 0.05))) +
labs(x = "Observed value",
y = "Density",
fill = "") +
my_theme
ggsave("figs/fig2_density_climate.png", width = 12, height = 5)
3.3 Environmental tipology
<- c('1-initial growing',
names.window '2-Tillering',
'3-Booting',
'4-Heading/Flowering',
"5-Grain filling",
"6-Physiological maturity")
<-
out env_typing(env.data = env_data,
env.id = "env",
var.id = c("trange", "tmax", "tmin", "dbp", "etp", "vpd"),
by.interval = TRUE,
time.window = c(0, 15, 35, 65, 90, 140),
names.window = names.window)
<-
out2 separate(out,
env.variable,into = c("var", "freq"),
sep = "_",
extra = "drop")
3.3.1 tmax
# plot the distribution of envirotypes for dbp
<-
p1 |>
out2 subset(var == "tmax") |> # change the variable here
ggplot() +
geom_bar(aes(x=Freq, y=interval,fill=freq),
position = "fill",
stat = "identity",
width = 1,
color = "white",
size=.2)+
facet_grid(~env, scales = "free", space = "free")+
scale_y_discrete(expand = c(0,0))+
scale_x_continuous(expand = c(0,0))+
xlab('Relative frequency of the maximum temperature (ยบC)')+
ylab("Crop stage")+
labs(fill='Envirotype')+
theme(axis.title = element_text(size=12),
legend.text = element_text(size=9),
strip.text = element_text(size=12),
legend.title = element_text(size=12),
strip.background = element_rect(fill="gray95",size=1),
legend.position = 'bottom')
3.3.2 vpd
# plot the distribution of envirotypes for dbp
<-
p2 |>
out2 subset(var == "vpd") |> # change the variable here
ggplot() +
geom_bar(aes(x=Freq, y=interval,fill=freq),
position = "fill",
stat = "identity",
width = 1,
color = "white",
size=.2)+
facet_grid(~env, scales = "free", space = "free")+
scale_y_discrete(expand = c(0,0))+
scale_x_continuous(expand = c(0,0))+
xlab(expression(Relative~frequency~of~the~vapor~pressure~deficit~~(KPa~day^{-1})))+
ylab("Crop stage")+
labs(fill='Envirotype')+
theme(axis.title = element_text(size=12),
legend.text = element_text(size=9),
strip.text = element_text(size=12),
legend.title = element_text(size=12),
strip.background = element_rect(fill="gray95",size=1),
legend.position = 'bottom')
3.3.3 dbp
# plot the distribution of envirotypes for dbp
<-
p3 |>
out2 subset(var == "dbp") |> # change the variable here
ggplot() +
geom_bar(aes(x=Freq, y=interval,fill=freq),
position = "fill",
stat = "identity",
width = 1,
color = "white",
size=.2)+
facet_grid(~env, scales = "free", space = "free")+
scale_y_discrete(expand = c(0,0))+
scale_x_continuous(expand = c(0,0))+
xlab(expression(Relative~frequency~of~the~rainfall~deficit~~(mm~day^{-1})))+
ylab("Crop stage")+
labs(fill='Envirotype')+
theme(axis.title = element_text(size=12),
legend.text = element_text(size=9),
strip.text = element_text(size=12),
legend.title = element_text(size=12),
strip.background = element_rect(fill="gray95",size=1),
legend.position = 'bottom')
arrange_ggplot(p1, p2, p3,
ncol = 1,
tag_levels = "a")
![Quantiles for maximum temperature (a), vapor pressure deficit (b), and deficit by precipitation (c) across distinct crop stages of barley growing in two crop seasons.](code_files/figure-html/unnamed-chunk-11-1.png)
Figure 3.1: Quantiles for maximum temperature (a), vapor pressure deficit (b), and deficit by precipitation (c) across distinct crop stages of barley growing in two crop seasons.
ggsave("figs/fig3_typology.png", width = 10, height = 9)
4 Analysis
4.1 Model
# Pivot longer to analyze each variable with group_by()
<-
df_long |>
df_traits pivot_longer(CAN:GY)
<-
models %>%
df_long group_by(YEARS, name) %>%
doo(~lmer(value ~ STRESS + REP + (1|GENOTYPES) + (1|GENOTYPES:STRESS) + (1|REP:IBLOCK), data = .))
4.2 LRT
<-
LRT |>
models mutate(data = map(data,
~.x %>%
::ranova(., reduce.terms = TRUE) |>
lmerTestrownames_to_column("term"))) |>
unnest(data)
4.3 Variance components
<-
vcomp |>
models mutate(data = map(data,
~.x %>%
::VarCorr() |>
lme4as.data.frame() |>
add_cols(Percent = (vcov / sum(vcov)) * 100))) |>
unnest(data)
ggplot(vcomp, aes(name, vcov, fill = grp)) +
geom_bar(stat = "identity",
position = "fill",
color = "black",
width = 1) +
facet_wrap(~YEARS) +
geom_text(aes(label = paste0(round(Percent, 2), "%")),
position = position_fill(vjust = .5),
size = 3) +
scale_y_continuous(expand = expansion(c(0, 0)),
labels = function(x) paste0(x*100, "%")) +
scale_x_discrete(expand = expansion(0)) +
theme_bw()+
theme(legend.position = "bottom",
axis.ticks.length = unit(0.2, "cm"),
panel.grid = element_blank(),
legend.title = element_blank(),
strip.background = element_rect(fill = NA),
text = element_text(colour = "black"),
axis.text = element_text(colour = "black")) +
labs(x = "Assessed traits",
y = "Proportion of phenotypic variance")
ggsave("figs/fig4_vcomp.png", width = 8, height = 6)
5 BLUPs for Genotype x Stress by Year
<-
blups |>
models mutate(data = map(data,
~.x %>%
augment())) |>
unnest(data) |>
group_by(YEARS, name, STRESS, GENOTYPES) |>
summarise(blup = mean(.fitted)) |>
ungroup() |>
pivot_wider(names_from = name, values_from = blup)
blups
5.1 correlation between traits
<-
cor_di |>
blups filter(STRESS == "DI") |>
corr_coef(CAN:PH) |>
network_plot(legend_position = "bottom", show = "all")
<-
cor_fi |>
blups filter(STRESS == "FI") |>
corr_coef(CAN:PH) |>
network_plot(legend_position = "bottom", show = "all")
arrange_ggplot(cor_di, cor_fi,
guides = "collect",
tag_levels = "a")
![Correlation between traits under deficit irrigation (a) and full irrigation (b)](code_files/figure-html/unnamed-chunk-16-1.png)
Figure 5.1: Correlation between traits under deficit irrigation (a) and full irrigation (b)
ggsave("figs/fig5_network_traits.png", width = 8, height = 5)
6 MGIDI appied to each environment
<- split_factors(blups, YEARS, STRESS) blups_ys
6.1 2019 DI
<-
mgidi_2019_di $`2019 | DI` |>
blups_yscolumn_to_rownames("GENOTYPES") |>
mgidi(ideotype = c("l, h, h, h, l, h"),
weights = c(1,1,1,5,1,2), # remove to consider equal heigths
SI = 5,
verbose = FALSE)
6.2 2019 FU
<-
mgidi_2019_fi $`2019 | FI` |>
blups_yscolumn_to_rownames("GENOTYPES") |>
mgidi(ideotype = c("l, h, h, h, l, h"),
weights = c(1,1,1,5,1,2), # remove to consider equal heigths
SI = 5,
verbose = FALSE)
6.3 2020 DI
<-
mgidi_2020_di $`2020 | DI` |>
blups_yscolumn_to_rownames("GENOTYPES") |>
mgidi(ideotype = c("l, h, h, h, l, h"),
weights = c(1,1,1,5,1,2), # remove to consider equal heigths
SI = 5,
verbose = FALSE)
6.4 2020 FU
<-
mgidi_2020_fi $`2020 | FI` |>
blups_yscolumn_to_rownames("GENOTYPES") |>
mgidi(ideotype = c("l, h, h, h, l, h"),
weights = c(1,1,1,5,1,2), # remove to consider equal heigths
SI = 5,
verbose = FALSE)
6.5 Selected genotypes
<- mgidi_2019_di |> sel_gen()
sel_2019DI <- mgidi_2019_fi |> sel_gen()
sel_2019FI <- mgidi_2020_di |> sel_gen()
sel_2020DI <- mgidi_2020_fi |> sel_gen()
sel_2020FI
<-
v1 venn_plot(sel_2019DI, sel_2020DI,
sel_2019FI, sel_2020FI,names = c("2019DI", "2020DI", "2019FI", "2020FI"))
# Selected in DI
<- set_union(sel_2019DI, sel_2020DI)
DI # Selected in FI
<- set_union(sel_2019FI, sel_2020FI)
FI
<- venn_plot(DI, FI)
v2 arrange_ggplot(v1, v2,
tag_levels = "a")
ggsave("figs/fig6_venn.png", width = 12, height = 5)
# only in DI (23)
set_difference(DI, FI)
## [1] "08BA_82" "08AB_09" "06MT_91" "09BA_21" "08MT_63" "09AB_10" "07UT_46"
## [8] "06WA_38" "08BA_95" "08MT_15" "07WA_03" "08MT_77" "08AB_46" "08AB_06"
## [15] "06MT_78" "06WA_53" "08WA_28" "08N2_66" "07UT_55" "09N2_74" "06WA_63"
## [22] "08WA_86" "07UT_14"
# only in FI (18)
set_difference(FI, DI)
## [1] "09AB_89" "07AB_66" "08MT_68" "07AB_77" "07UT_94" "06MT_46" "07MT_43"
## [8] "09AB_25" "09WA_19" "07BA_89" "09MT_16" "07UT_48" "08N6_90" "09AB_94"
## [15] "09AB_78" "06N2_39" "07UT_93" "08MT_10"
# in all environments
set_intersect(sel_2019DI, sel_2020DI, sel_2019FI, sel_2020FI)
## [1] "09UT_24"
6.6 MGIDI index for the selected genotypes
<-
m1 gmd(mgidi_2019_di, "MGIDI") |>
mutate(selected = ifelse(Genotype %in% set_union(sel_2019DI, sel_2020DI,
sel_2019FI, sel_2020FI),"Yes",
"No")) |>
subset(selected == "Yes") |>
ggplot(aes(x = MGIDI, y = reorder(Genotype, -MGIDI))) +
geom_line(aes(group = 1)) +
geom_point(size = 2.5) +
labs(x = "Multi-trait Genotype-Ideotype Distance Index",
y = "Genotypes") +
my_theme
<-
m2 gmd(mgidi_2019_fi, "MGIDI") |>
mutate(selected = ifelse(Genotype %in% set_union(sel_2019DI, sel_2020DI,
sel_2019FI, sel_2020FI),"Yes",
"No")) |>
subset(selected == "Yes") |>
ggplot(aes(x = MGIDI, y = reorder(Genotype, -MGIDI))) +
geom_line(aes(group = 1)) +
geom_point(size = 2.5) +
labs(x = "Multi-trait Genotype-Ideotype Distance Index",
y = "Genotypes") +
my_theme
<-
m3 gmd(mgidi_2020_di, "MGIDI") |>
mutate(selected = ifelse(Genotype %in% set_union(sel_2019DI, sel_2020DI,
sel_2019FI, sel_2020FI),"Yes",
"No")) |>
subset(selected == "Yes") |>
ggplot(aes(x = MGIDI, y = reorder(Genotype, -MGIDI))) +
geom_line(aes(group = 1)) +
geom_point(size = 2.5) +
labs(x = "Multi-trait Genotype-Ideotype Distance Index",
y = "Genotypes") +
my_theme
<-
m4 gmd(mgidi_2020_fi, "MGIDI") |>
mutate(selected = ifelse(Genotype %in% set_union(sel_2019DI, sel_2020DI,
sel_2019FI, sel_2020FI),"Yes",
"No")) |>
subset(selected == "Yes") |>
ggplot(aes(x = MGIDI, y = reorder(Genotype, -MGIDI))) +
geom_line(aes(group = 1)) +
geom_point(size = 2.5) +
labs(x = "Multi-trait Genotype-Ideotype Distance Index",
y = "Genotypes") +
my_theme
arrange_ggplot(m1, m2, m3, m4,
ncol = 4,
tag_levels = "a")
![Selected genotypes ranked by the MGIDI in the environments 2019DI (a), 2020DI (b), 2019FI (c), and 2020FI (d)](code_files/figure-html/unnamed-chunk-23-1.png)
Figure 6.1: Selected genotypes ranked by the MGIDI in the environments 2019DI (a), 2020DI (b), 2019FI (c), and 2020FI (d)
ggsave("figs/fig7_selected.png", width = 17, height = 8)
6.7 Selection gains
<-
sel_gain list(
a = gmd(mgidi_2019_di) |> mutate(YEAR = 2019, STRESS = "Deficit irrigation"),
b = gmd(mgidi_2019_fi) |> mutate(YEAR = 2019, STRESS = "Full irrigation"),
c = gmd(mgidi_2020_di) |> mutate(YEAR = 2020, STRESS = "Deficit irrigation"),
d = gmd(mgidi_2020_fi) |> mutate(YEAR = 2020, STRESS = "Full irrigation")
|>
) bind_rows() |>
mutate(negative = ifelse(SDperc <= 0, "Negative", "Positive"))
library(ggh4x)
ggplot(sel_gain, aes(VAR, SDperc)) +
geom_hline(yintercept = 0, size = 0.2) +
geom_col(aes(fill = negative),
col = "black",
size = 0.2) +
scale_y_continuous(expand = expansion(mult = 0.1)) +
facet_nested(~YEAR + STRESS +sense, scales = "free", space = "free") +
geom_text(aes(label = round(SDperc, 2),
hjust = ifelse(SDperc > 0, -0.1, 1.1),
angle = 90),
size = 3) +
labs(x = "Traits",
y = "Selection gains (%)") +
+
my_theme theme(legend.position = "bottom",
legend.title = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(color = "black",
fill = "white"))
ggsave("figs/fig8_selection_gains.png", width = 12, height = 6)
7 Section info
sessionInfo()
## R version 4.2.0 (2022-04-22 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Portuguese_Brazil.utf8 LC_CTYPE=Portuguese_Brazil.utf8
## [3] LC_MONETARY=Portuguese_Brazil.utf8 LC_NUMERIC=C
## [5] LC_TIME=Portuguese_Brazil.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] broom.mixed_0.2.9.4 broom_1.0.0 lmerTest_3.1-3
## [4] lme4_1.1-30 Matrix_1.4-1 metan_1.17.0.9000
## [7] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9
## [10] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [13] tibble_3.1.7 tidyverse_1.3.1 ggridges_0.5.3
## [16] ggh4x_0.2.1 ggplot2_3.3.6 rio_0.5.29
## [19] EnvRtype_1.1.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-157 fs_1.5.2 lubridate_1.8.0
## [4] RColorBrewer_1.1-3 httr_1.4.3 numDeriv_2016.8-1.1
## [7] tools_4.2.0 backports_1.4.1 bslib_0.3.1
## [10] utf8_1.2.2 R6_2.5.1 DBI_1.1.3
## [13] colorspace_2.0-3 withr_2.5.0 tidyselect_1.1.2
## [16] GGally_2.1.2 curl_4.3.2 compiler_4.2.0
## [19] textshaping_0.3.6 cli_3.3.0 rvest_1.0.2
## [22] xml2_1.3.3 labeling_0.4.2 bookdown_0.27
## [25] sass_0.4.1 scales_1.2.0 systemfonts_1.0.4
## [28] digest_0.6.29 minqa_1.2.4 foreign_0.8-82
## [31] rmarkdown_2.14 pkgconfig_2.0.3 htmltools_0.5.2
## [34] parallelly_1.32.0 highr_0.9 dbplyr_2.2.1
## [37] fastmap_1.1.0 rlang_1.0.3 readxl_1.4.0
## [40] rstudioapi_0.13 jquerylib_0.1.4 generics_0.1.3
## [43] farver_2.1.1 jsonlite_1.8.0 zip_2.2.0
## [46] magrittr_2.0.3 patchwork_1.1.1 Rcpp_1.0.8.3
## [49] munsell_0.5.0 fansi_1.0.3 furrr_0.3.1
## [52] lifecycle_1.0.1 stringi_1.7.6 yaml_2.3.5
## [55] mathjaxr_1.6-0 MASS_7.3-57 plyr_1.8.7
## [58] grid_4.2.0 parallel_4.2.0 listenv_0.8.0
## [61] ggrepel_0.9.1 crayon_1.5.1 lattice_0.20-45
## [64] haven_2.5.0 splines_4.2.0 hms_1.1.1
## [67] knitr_1.39 pillar_1.8.0 boot_1.3-28
## [70] reshape2_1.4.4 codetools_0.2-18 reprex_2.0.1
## [73] glue_1.6.2 evaluate_0.15 data.table_1.14.2
## [76] modelr_0.1.8 foreach_1.5.2 nloptr_2.0.3
## [79] vctrs_0.4.1 rmdformats_1.0.4 tzdb_0.3.0
## [82] tweenr_1.0.2 cellranger_1.1.0 gtable_0.3.0
## [85] polyclip_1.10-0 future_1.26.1 reshape_0.8.9
## [88] assertthat_0.2.1 xfun_0.31 ggforce_0.3.3
## [91] openxlsx_4.2.5 ragg_1.2.2 iterators_1.0.14
## [94] globals_0.15.1 ellipsis_0.3.2