Commit 1056078d authored by luroth's avatar luroth
Browse files

Removed history, due to sensitive data

parents
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
library(dplyr)
# Function to extract temperature course for specific site and time interval
extract_covariate_course <- function(df, site_id, from, to) {
df_ <- df %>% filter(site.id == site_id)
df_ <- df_ %>% filter(timestamp > from, timestamp <= to)
return(df_)
}
\ No newline at end of file
###### Growth response functions
#### Loss function for optimization
loss_function <- function(par, fixed_par, df_, .function) {
if ("sigma_error" %in% names(par)) {
sigma_error <- par[['sigma_error']]
par <- within(as.list(par), rm(sigma_error))
} else {
sigma_error <- fixed_par[['sigma_error']]
fixed_par <- within(as.list(fixed_par), rm(sigma_error))
}
if (sigma_error < 0) {
deviance <- 10000000
} else {
data_y <- df_$value_delta
data_x <- do.call(.function, args = c(list(x_list = df_$temps, x2_list = df_$irrs), par, fixed_par))
likelihoods <- dnorm(data_y, mean = data_x, sd = sigma_error)
log.likelihoods <- log(likelihoods)
deviance <- -2 * sum(log.likelihoods)
}
if (is.infinite(deviance)) {
deviance <- 10000000
}
return(deviance)
}
optimize_ <- function(df_, .function, par, lower = NA, upper = NA, fixed_par, prefix) {
par <- par[!names(par) %in% names(fixed_par)]
if (any(is.na(lower)) & any(is.na(upper))) {
parameter.fits <- optim(par = par,
fn = loss_function, hessian = T,
df_ = df_,
fixed_par = fixed_par,
.function = .function,
method = "BFGS",
control = list(trace = 0))
} else {
lower <- lower[!names(lower) %in% paste0("lower_", names(fixed_par))]
upper <- upper[!names(upper) %in% paste0("upper_", names(fixed_par))]
parameter.fits <- optim(par = par,
fn = loss_function, hessian = T,
df_ = df_,
fixed_par = fixed_par,
.function = .function,
method = "L-BFGS-B",
lower = lower,
upper = upper,
control = list(trace = 0))
}
if (parameter.fits$convergence %in% c(0, 51)) {
hessian <- parameter.fits$hessian
hessian.inv <- try(solve(hessian), silent = T)
if (any(class(hessian.inv) == "matrix")) {
parameter <- parameter.fits$par
parameter.se <- sqrt(diag(hessian.inv))
if ("sigma_error" %in% names(parameter)) {
parameter_for_pred <- within(as.list(parameter), rm(sigma_error))
fixed_par_for_pred <- fixed_par
} else {
parameter_for_pred <- parameter
fixed_par_for_pred <- within(as.list(fixed_par), rm(sigma_error))
}
residuals <- df_$value_delta -
do.call(.function, args = c(list(x_list = df_$temps, x2_list = df_$irrs), parameter_for_pred, fixed_par_for_pred))
RMSE <- sqrt(mean(residuals^2))
df_ <- data.frame(parameter = paste0(prefix, names(par)),
value = parameter,
se = parameter.se,
RMSE = RMSE)
} else {
df_ <- data.frame(parameter = paste0(prefix, names(par)),
value = NA,
se = NA,
RMSE = NA)
}
} else {
df_ <- data.frame(parameter = paste0(prefix, names(par)),
value = NA,
se = NA,
RMSE = NA)
}
return(df_)
}
## Start and borders of parameters for different models. Start values based on first run multi-year BLUEs
defaults_ <- list(
# General
lower_rmax = 0.01,
start_rmax = 0.666,
upper_rmax = 10,
lower_Tmin = -5,
start_Tmin = 4,
upper_Tmin = 10.649,
lower_Topt = 10.65,
upper_Topt = 24.149,
start_Topt = 20,
lower_Tmax = 24.15,
upper_Tmax = 50,
start_Tmax = 30,
# Linear model
lm_start_intercept = 0.0431,
lm_start_slope = 0.0404,
# Asym model
asym_lower_lrc = 0,
asym_start_lrc = 3.96,
asym_upper_lrc = 15,
# Sigma
lower_sigma_error = 0.0001,
start_sigma_error = 1,
upper_sigma_error = 100)
# Linear model
##############
# Base function
growth_response_linear <- function(x, intercept, slope) {
return(x * slope + intercept)
}
# Wrapper function to process temperature courses
growth_response_linear_l <- function(x_list, x2_list, intercept, slope) {
y <- lapply(x_list, growth_response_linear, intercept, slope)
y <- lapply(y, sum) %>% unlist()
return(y)
}
# Fit function
lm_fit <- function(df_, fixed_par) {
par <- c(intercept = defaults_[['lm_start_intercept']],
slope = defaults_[['lm_start_slope']],
sigma_error = defaults_[['start_sigma_error']])
df_ <- optimize_(df_, growth_response_linear_l, par, lower = NA, upper = NA, fixed_par, "lm_")
return(df_)
}
growth_response_bp_linear <- function(x, c0, cOpt, Asym) {
slope <- Asym / (cOpt - c0)
y <- (x - c0) * slope
y <- if_else(x > c0, y, 0)
y <- if_else(x < cOpt, y, Asym)
return(y)
}
# Wrapper function to process temperature courses
growth_response_bp_linear_l <- function(x_list, x2_list, c0, cOpt, Asym) {
y <- lapply(x_list, growth_response_bp_linear, c0, cOpt, Asym)
y <- lapply(y, sum) %>% unlist()
return(y)
}
# Fit function
bplm_fit <- function(df_, fixed_par) {
par <- c(cOpt = defaults_[['start_Topt']],
c0 = defaults_[['start_Tmin']],
Asym = defaults_[['start_rmax']],
sigma_error = defaults_[['start_sigma_error']])
lower <- c(lower_Asym = defaults_[['lower_rmax']], lower_cOpt = defaults_[['lower_Topt']], lower_c0 = defaults_[['lower_Tmin']])
upper <- c(upper_Asym = defaults_[['upper_rmax']], upper_cOpt = defaults_[['upper_Topt']], upper_c0 = defaults_[['upper_Tmin']])
df_ <- optimize_(df_, growth_response_bp_linear_l, par, lower = lower, upper = upper, fixed_par, "bplm_")
# Set found Asym <= 0 to NA, this is not a realsitic growth curve
bplm_Asym <- (df_ %>% filter(parameter == "bplm_Asym"))$value
if(is.na(bplm_Asym) | bplm_Asym < 0.001) {
df_$value <- NA
df_$se <- NA
}
return(df_)
}
# 3 parameter asymptotic model
#####################################################
# Base function
growth_response_asym <- function(x, Asym, lrc, c0) {
y <- SSasympOff(x / 30, Asym, lrc, c0 / 30)
y <- if_else(y > 0, y, 0)
return(y)
}
# Wrapper function to process temperature courses
growth_response_asym_l <- function(x_list, x2_list, Asym, lrc, c0) {
y <- lapply(x_list, growth_response_asym, Asym, lrc, c0)
y <- lapply(y, sum) %>% unlist()
return(y)
}
# Fit function
asym_fit <- function(df_, fixed_par) {
par <- c(Asym = defaults_[['start_rmax']],
lrc = defaults_[['asym_start_lrc']],
c0 = defaults_[['start_Tmin']],
sigma_error = defaults_[['start_sigma_error']])
lower <- c(lower_Asym = defaults_[['lower_rmax']], lower_lrc = defaults_[['asym_lower_lrc']],
lower_c0 = defaults_[['lower_Tmin']],
defaults_['lower_sigma_error'])
upper <- c(upper_Asym = defaults_[['upper_rmax']], upper_lrc = defaults_[['asym_upper_lrc']],
upper_c0 = defaults_[['upper_Tmin']],
defaults_['upper_sigma_error'])
df_ <- optimize_(df_, growth_response_asym_l, par, lower = lower, upper = upper, fixed_par, "asym_")
return(df_)
}
## Wang-engel curvilinear model
# Base function with irradiance correction
growth_response_wang_engel <- function(x, Tmin, Topt, Tmax, r) {
temp <- x[[1]]
alpha <- log(2) / log((Tmax - Tmin)/(Topt - Tmin))
beta <- 1
y <- (
( 2*((x - Tmin)^alpha) * (Topt - Tmin)^alpha - (x - Tmin)^(2*alpha)) /
( (Topt - Tmin)^(2*alpha) )
)^beta
y <- if_else(x < Tmin, 0, y)
y <- if_else(x > Tmax, 0, y)
y <- y * r
return(y)
}
# Wrapper function to process temperature courses
growth_response_wang_engel_l <- function(x_list, x2_list, Tmin, Topt, Tmax, r) {
y <- lapply(x_list, growth_response_wang_engel, Tmin, Topt, Tmax, r)
y <- lapply(y, sum) %>% unlist()
return(y)
}
engel_fit <- function(df_, fixed_par) {
par <- c(r = defaults_[['start_rmax']],
Tmin = defaults_[['start_Tmin']],
Topt = defaults_[['start_Topt']],
Tmax = defaults_[['start_Tmax']],
sigma_error = defaults_[['start_sigma_error']])
lower <- c(lower_r = defaults_[['lower_rmax']],
lower_Tmin = defaults_[['lower_Tmin']],
lower_Topt = defaults_[['lower_Topt']],
lower_Tmax = defaults_[['lower_Tmax']],
defaults_['lower_sigma_error'])
upper <- c(upper_r = defaults_[['upper_rmax']],
upper_Tmin = defaults_[['upper_Tmin']],
upper_Topt = defaults_[['upper_Topt']],
upper_Tmax = defaults_[['upper_Tmax']],
defaults_['upper_sigma_error'])
df_ <- optimize_(df_, growth_response_wang_engel_l, par, lower = lower, upper = upper, fixed_par, "engel_")
return(df_)
}
# Transform and plot functions
##############################
params_to_long <- function(model, model_name, fit_method_name = "TCourse") {
params_long <- model %>% unnest(cols = model)
params_long$fit_method <- fit_method_name
params_long$model <- model_name
return(params_long)
}
params_to_wide <- function(params_long) {
params_wide <- params_long %>%
select(-se) %>%
pivot_wider(names_from = "parameter", values_from = "value")
return(params_wide)
}
library(asreml)
# Calculation function
maxiter <- 1500
fit_REML <- function(df, parameter = "", use_weights=T) {
print(parameter)
df_orig <- df
# Set factors
df <- df %>% mutate(genotype = as.factor(genotype.id), year_site.UID = as.factor(year_site.UID))
# Filter out genotypes that exist in only one year-site:
genotype_set <- df %>%
ungroup() %>%
select(genotype, year_site.UID) %>%
unique() %>%
group_by(genotype) %>%
summarize(n = n()) %>%
filter(n > 1) %>%
ungroup() %>%
select(genotype) %>%
unique()
df <- df %>% filter(genotype %in% genotype_set$genotype)
# Remove NA values
df <- df %>% filter(!is.na(BLUE))
# Re-expand to genotype and year-site set to have full design matrix
genotype_set <- expand_grid(year_site.UID = df$year_site.UID %>% unique(),
genotype = df$genotype %>% unique())
df <- left_join(genotype_set, df, by = c("year_site.UID", "genotype"))
# Drop unused (genotype) levels
df <- droplevels(df)
# Arrange df
df <- df %>% arrange(genotype, year_site.UID)
# Set weights
if(use_weights) {
weights_ <- df$weights_vcov
weights_[is.na(df$BLUE)] <- 0
weights_[is.na(weights_)] <- 0
} else {
weights_ <- 1
}
if(nrow(df)== 0) {
print("No data left, skip")
df_BLUE <- data.frame(genotype.id = df_orig$genotype.id %>% unique(), BLUE = NA, AIC_BLUE=NA, BIC_BLUE=NA)
return(df_BLUE)
}
n.rep <- mean(with(df, table(genotype)))
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(BLUE ~ 1 + genotype, data = df, trace = F,
random = ~year_site.UID,
residual = ~genotype:diag(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
fail="soft")
})
if(is.null(fit_BLUE)) {
print("REDO")
fit_BLUE <- asreml(BLUE ~ 1 + genotype, data = df, trace = F,
random = ~year_site.UID,
residual = ~genotype:id(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
}
# Extract BLUEs
# BLUEs
g.predBLUE <- NULL
try({
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE <- data.frame(g.predBLUE$pvals[, c(1, 2)])
names(df_BLUE) <- c("genotype.id", "BLUE")
})
if(is.null(g.predBLUE)) {
print("PREDICT failed, using NAs")
df_BLUE <- data.frame(genotype.id = df$genotype.id %>% unique(), BLUE =NA)
}
df_BLUE <- df_BLUE %>% mutate(genotype.id = as.numeric(as.character(genotype.id)))
df_BLUE_u_BLUP <- df_BLUE
df_BLUE_u_BLUP$AIC_BLUE <- summary(fit_BLUE)$aic
df_BLUE_u_BLUP$BIC_BLUE <- summary(fit_BLUE)$bic
return(df_BLUE_u_BLUP)
}
fit_REML_extended <- function(df, parameter = "", m_gen_mat = NULL) {
print(parameter)
# Set factors
df <- df %>% mutate(genotype = as.factor(genotype.id), year_site.UID = as.factor(year_site.UID))
# Filter out genotypes that exist in only one year-site:
genotype_set <- df %>%
ungroup() %>%
select(genotype, year_site.UID) %>%
unique() %>%
group_by(genotype) %>%
summarize(n = n()) %>%
filter(n > 1) %>%
ungroup() %>%
select(genotype) %>%
unique()
df <- df %>% filter(genotype %in% genotype_set$genotype)
# Remove NA values
df <- df %>% filter(!is.na(BLUE))
# Re-expand to genotype and year-site set to have full design matrix
genotype_set <- expand_grid(year_site.UID = df$year_site.UID %>% unique(),
genotype = df$genotype %>% unique())
df <- left_join(genotype_set, df, by = c("year_site.UID", "genotype"))
# If genetic relationship matrix provided: Reduce to genotype set
if (!is.null(m_gen_mat)) {
genotype_set <- tibble(genotype = as.factor(as.character(rownames(m_gen_mat))))
df <- inner_join(genotype_set, df,
by = "genotype")
m_gen_mat_ <- m_gen_mat[as.character(df$genotype %>% unique()),
as.character(df$genotype %>% unique())]
m_gen_mat <- m_gen_mat_
df <- df %>% mutate(genotype = as.factor(genotype))
setdiff(df$genotype %>% unique(), dimnames(m_gen_mat)[[1]])
length(setdiff(df$genotype %>% unique(), dimnames(m_gen_mat)[[1]]))
}
# Drop unused (genotype) levels
df <- droplevels(df)
# Arrange df
df <- df %>% arrange(genotype, year_site.UID)
# Set weights
weights_ <- df$weights_vcov
weights_[is.na(df$BLUE)] <- 0
weights_[is.na(weights_)] <- 0
n.rep <- mean(with(df, table(genotype)))
# Model without relationship matrix
if (is.null(m_gen_mat)) {
# Models for BLUEs and BLUPs
if(n.rep>2) {
fit_BLUP <- asreml(BLUE ~ 1 + year_site.UID, data = df, trace = F,
random = ~genotype,
residual = ~genotype:diag(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
if(any(as.data.frame(summary(fit_BLUP)$varcomp)['bound'] == 'B')) {
print("Year-site variance estimation for one or more components failed, using id() instead of diag()")
fit_BLUP <- asreml(BLUE ~ 1 + year_site.UID, data = df, trace = F,
random = ~genotype,
residual = ~genotype:id(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
}
} else {
fit_BLUP <- asreml(BLUE ~ 1 + year_site.UID, data = df, trace = F,
random = ~genotype,
residual = ~genotype:id(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
}
vc.g <- summary(fit_BLUP)$varcomp['genotype', 'component']
} else {
# Model with relationship matrix
fit_BLUP <- asreml(BLUE ~ 1 + year_site.UID, data = df, trace = F,
random = ~vm(genotype, m_gen_mat),
residual = ~id(genotype):diag(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
if(any(as.data.frame(summary(fit_BLUP)$varcomp)['bound'] == 'B')) {
print("Year-site variance estimation for one or more components failed, using id() instead of diag()")
fit_BLUP <- asreml(BLUE ~ 1 + year_site.UID, data = df, trace = F,
random = ~vm(genotype, m_gen_mat),
residual = ~id(genotype):id(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
}
vc.g <- summary(fit_BLUP)$varcomp['vm(genotype, m_gen_mat)', 'component']
}
if(n.rep>2) {
fit_BLUE <- asreml(BLUE ~ 1 + genotype, data = df, trace = F,
random = ~year_site.UID,
residual = ~genotype:diag(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
} else {
fit_BLUE <- asreml(BLUE ~ 1 + genotype, data = df, trace = F,
random = ~year_site.UID,
residual = ~genotype:id(year_site.UID),
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter)
}
# Mean variance of a difference of two genotypic BLUPs
# obtain squared s.e.d. matrix
vdBLUP.mat <- predict(fit_BLUP, classify = "genotype", only = "genotype", sed = TRUE)$sed^2
# take mean of upper triangle
vdBLUP.avg <- mean(vdBLUP.mat[upper.tri(vdBLUP.mat, diag = FALSE)])
#############
# H2 Cullis #
#############
H2Cullis