Commit dac1cd47 authored by luroth's avatar luroth
Browse files

van Eeuwjick et al processing added

parent eb0eb17e
library(scam) library(scam)
`vcov.scam` <- function (object, freq = FALSE, dispersion = NULL,
parametrized = TRUE, ...) {
if (freq) {
vc <- if (parametrized) {
object$Ve.t
} else {
object$Ve
}
} else {
vc <- if (parametrized) {
object$Vp.t
} else {
object$Vp
}
}
if (!is.null(dispersion)) {
vc <- dispersion * vc/object$sig2
}
name <- names(object$edf)
dimnames(vc) <- list(name, name)
vc
}
`coef.scam` <- function(object, parametrized = TRUE, ...) {
coefs <- if (parametrized) {
object$coefficients.t
} else {
object$coefficients
}
coefs
}
fit_scam_spline <- function(x, y, k = NA, bs = "mpi", label = NULL, optimizer = "bfgs") { fit_scam_spline <- function(x, y, k = NA, bs = "mpi", label = NULL, optimizer = "bfgs") {
if (!is.null(label)) { print(label) } if (!is.null(label)) { print(label) }
if (is.na(k)) k <- round(length(x) * 3 / 4) if (is.na(k)) k <- round(length(x) * 3 / 4)
...@@ -16,6 +48,23 @@ fit_scam_spline <- function(x, y, k = NA, bs = "mpi", label = NULL, optimizer = ...@@ -16,6 +48,23 @@ fit_scam_spline <- function(x, y, k = NA, bs = "mpi", label = NULL, optimizer =
return(spline) return(spline)
} }
fit_scam_spline_weights <- function(x, y, w, k = NA, bs = "mpi", label = NULL, optimizer = "bfgs") {
w <- 1 / (w^2)
if (!is.null(label)) { print(label) }
if (is.na(k)) k <- round(length(x) * 3 / 4)
if (k > 20) k <- 20
spline <- NULL
try(
system.time(spline <- scam(y ~ s(as.numeric(x), k = k, bs = bs), optimizer = optimizer, weights=w))
)
if (is.null(spline)) {
print("decreasing k")
spline <- scam(y ~ s(as.numeric(x), k = k - 1, bs = bs), weights=w)
}
return(spline)
}
predict_scam_spline <- function(spline, x_, deriv = NULL, se = FALSE) predict_scam_spline <- function(spline, x_, deriv = NULL, se = FALSE)
{ {
...@@ -37,9 +86,40 @@ predict_scam_spline <- function(spline, x_, deriv = NULL, se = FALSE) ...@@ -37,9 +86,40 @@ predict_scam_spline <- function(spline, x_, deriv = NULL, se = FALSE)
return(predicts) return(predicts)
} }
predict_scam_spline_posteriors <- function(spline, x_)
{
# Design matrix
lp <- predict.scam(spline, newdata = x_, type = "lpmatrix")
# Estimated coefficients
coef <- coef.scam(spline)
# install.packages("gratia")
vc <- vcov.scam(spline)
if (!all( eigen(vc)$values >0 )) {
predicts <- predict.scam(spline, newdata = x_, se.fit = FALSE)
return(list(predicts))
}
# Sample from the distrubitions of the coefficients
set.seed(35)
sim <- MASS::mvrnorm(1000, mu = c(coef), Sigma = unname(vc))
# For each realisation, obtain the "fitted" curve
predicts_ <- lp %*% t(sim)
predicts <- lapply(seq_len(ncol(predicts_)), function(i) predicts_[,i])
predicts_r <- sapply (predicts, function (x) {length (x) <- nrow(predicts_); return (x)})
predicts_rr <- lapply(seq_len(nrow(predicts_r)), function(i) predicts_r[i,])
return(predicts_rr)
}
# Finds start/stop of growth phase, final value and key percentiles # Finds start/stop of growth phase, final value and key percentiles
find_start_stop_growth_phase <- function(df, text = NA, threshold_start = 1 / 4, threshold_stop = 1 / 4, delta_days = 40, final_height_agg = 24) { find_start_stop_growth_phase <- function(df, text = NA, threshold_start = 1 / 4, threshold_stop = 1 / 4, delta_days = 40, final_height_agg = 24) {
if (!is.na(text)) print(text) if (!is.na(text)) print(text)
# Extract maximum growth phase # Extract maximum growth phase
max_growth <- max(df$predict_deriv) max_growth <- max(df$predict_deriv)
if (max_growth == 0) { if (max_growth == 0) {
...@@ -133,3 +213,26 @@ find_start_stop_growth_phase <- function(df, text = NA, threshold_start = 1 / 4, ...@@ -133,3 +213,26 @@ find_start_stop_growth_phase <- function(df, text = NA, threshold_start = 1 / 4,
predict_p95 = p95_final_value, predict_p95 = p95_final_value,
predict_p95_se = p95_final_value_se)) predict_p95_se = p95_final_value_se))
} }
find_start_stop_growth_phase_posterior <- function(df, text = NA, threshold_start = 1 / 4, threshold_stop = 1 / 4, delta_days = 40, final_height_agg = 10) {
f <- function(pred) {return(c(0, diff(pred)))}
predict <- find_start_stop_growth_phase(df, text, threshold_start, threshold_stop, delta_days, final_height_agg)
df_posterior_1 <- sapply (df$predict_posteriors, function (x) {length (x) <- length(df$predict_posteriors[[1]]); return (x)})
df_posterior_1 <- lapply(seq_len(nrow(df_posterior_1)), function(i) df_posterior_1[i,])
df_posterior_2 <- lapply(df_posterior_1, f)
df_posterior <- mapply(function(X, Y) {tibble(predict = X, predict_deriv=Y, timestamp = df$timestamp, predict_se = df$predict_se)}, X=df_posterior_1, Y=df_posterior_2, SIMPLIFY = F)
predict_posterior <- lapply(df_posterior, find_start_stop_growth_phase, NA, threshold_start, threshold_stop, delta_days, final_height_agg)
df_predict_posterior <- bind_rows(predict_posterior)
df_predict_posterior_se <- df_predict_posterior %>% dplyr::select(-ends_with("se")) %>%
summarise_each(funs(mean,sd,sepost=sd(.)/sqrt(n()))) %>%
dplyr::select(ends_with("sepost"))
predict <- bind_cols(predict, df_predict_posterior_se)
return(predict)
}
\ No newline at end of file
...@@ -27,8 +27,8 @@ source("R/Model/Dose_response.R") ...@@ -27,8 +27,8 @@ source("R/Model/Dose_response.R")
source("R/Model/FitSpATS.R") source("R/Model/FitSpATS.R")
source("R/Model/Graphs.R") source("R/Model/Graphs.R")
start_run <- 11 start_run <- 1
max_runs <- 500 max_runs <- 1
number_of_cpus <- 35 number_of_cpus <- 35
sigma_error <- 10 sigma_error <- 10
......
This diff is collapsed.
This diff is collapsed.
# Working directory with temperature data # Working directory with temperature data
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_wheat_canopy_height_processing' path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs' path_simulation <- 'E:/Simulation/Runs'
setwd(path_home) setwd(path_home)
...@@ -151,6 +151,15 @@ for(run in start_run:max_runs) { ...@@ -151,6 +151,15 @@ for(run in start_run:max_runs) {
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>% mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>%
collect() collect()
df_params_yearsite_weightedpost <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>%
mutate(se = sepost) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>%
collect()
df_params_yearsite_not_weighted <- df_all_params %>% df_params_yearsite_not_weighted <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>% mutate(year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, parameter, model) %>% group_by(year_site.UID_, parameter, model) %>%
...@@ -162,11 +171,14 @@ for(run in start_run:max_runs) { ...@@ -162,11 +171,14 @@ for(run in start_run:max_runs) {
df_params_BLUES_yearsite_weighted <- df_params_yearsite_weighted %>% unnest(BLUEs) df_params_BLUES_yearsite_weighted <- df_params_yearsite_weighted %>% unnest(BLUEs)
df_params_BLUES_yearsite_weighted <- df_params_BLUES_yearsite_weighted %>% select(-data) df_params_BLUES_yearsite_weighted <- df_params_BLUES_yearsite_weighted %>% select(-data)
df_params_BLUES_yearsite_weighted$stage1 <- "weighted" df_params_BLUES_yearsite_weighted$stage1 <- "weighted"
df_params_BLUES_yearsite_weightedpost <- df_params_yearsite_weightedpost %>% unnest(BLUEs)
df_params_BLUES_yearsite_weightedpost <- df_params_BLUES_yearsite_weightedpost %>% select(-data)
df_params_BLUES_yearsite_weightedpost$stage1 <- "weighted (posterior simulation)"
df_params_BLUES_yearsite_not_weighted <- df_params_yearsite_not_weighted %>% unnest(BLUEs) df_params_BLUES_yearsite_not_weighted <- df_params_yearsite_not_weighted %>% unnest(BLUEs)
df_params_BLUES_yearsite_not_weighted <- df_params_BLUES_yearsite_not_weighted %>% select(-data) df_params_BLUES_yearsite_not_weighted <- df_params_BLUES_yearsite_not_weighted %>% select(-data)
df_params_BLUES_yearsite_not_weighted$stage1 <- "not weighted" df_params_BLUES_yearsite_not_weighted$stage1 <- "not weighted"
df_params_BLUES_yearsite <- bind_rows(df_params_BLUES_yearsite_weighted, df_params_BLUES_yearsite_not_weighted) df_params_BLUES_yearsite <- bind_rows(df_params_BLUES_yearsite_weighted, df_params_BLUES_yearsite_not_weighted, df_params_BLUES_yearsite_weightedpost)
#---------------------------------------------------------------- #----------------------------------------------------------------
......
# Working directory with temperature data # Working directory with temperature data
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_wheat_canopy_height_processing' path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs' path_simulation <- 'E:/Simulation/Runs'
setwd(path_home) setwd(path_home)
......
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
# Libraries to use
library(readr)
library(tidyr)
library(purrr)
library(ggplot2)
library(lubridate)
library(gridExtra)
library(plyr)
library(stringr)
library(asreml)
library(dplyr)
source("R/Model/FitREML.R")
source("R/Model/Graphs.R")
start_run <- 1
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
# Plot control overview
plot_ids <- c(
"FIP20150006",
"FIP20160002",
"FIP20170009",
"FIP20180013",
"FIP20190003")
## Measurement frequencies
measurement_dates_freq_1d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 1), "%m%d")
)
measurement_dates_freq_2d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 2), "%m%d")
)
measurement_dates_freq_3d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 3), "%m%d")
)
measurement_dates_freq_4d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 4), "%m%d")
)
measurement_dates_freq_5d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 5), "%m%d")
)
measurement_dates_freq_6d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 6), "%m%d")
)
measurement_dates_freq_7d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 7), "%m%d")
)
measurement_dates_freq_9d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 9), "%m%d")
)
measurement_dates_freq_11d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 11), "%m%d")
)
measurement_dates_freq_13d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 13), "%m%d")
)
measurement_dates_sets <- list(
"3 d" = measurement_dates_freq_3d)
# Read data
df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row * if_else(plot.replication > 1, 2, 1),
plot.discrete_y = plot.range * if_else(plot.replication > 1, 2, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv')
run <- 19
for(run in start_run:max_runs) {
#try({
# Validation data
df_genotypes_true <- read_csv(paste0(path_simulation, "/", run, "/genotypes_params.csv"))
df_genotypes_true <- df_genotypes_true %>% mutate(
bplm_slope = bplm_Asym / (bplm_cOpt- bplm_c0))
df_genotypes_true <- df_genotypes_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym, bplm_slope), names_to = "parameter")
print(paste0("Run ", run))
i <- length(measurement_dates_sets)
for(i in length(measurement_dates_sets):1) {
measurement_dates_set_name = names(measurement_dates_sets)[[i]]
measurement_dates_set = measurement_dates_sets[[i]]
set <- paste0("set", str_replace_all(measurement_dates_set_name, " ", ""))
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict_onestage.csv"))
if(TRUE | length(files)==0) {
df_params_BLUES_yearsite <- read_csv(paste0(path_simulation, "/", run, "/", set, "_year_site_BLUE_predict.csv"))
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% filter(model %in% c("spline"), parameter != "final_height")
df_params_BLUES_yearsite$weights_vcov <- 1/(df_params_BLUES_yearsite$BLUE_SE^2)
print("Fit stage 2 (asreml-R)")
df_BLUEs_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=T)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_weights$stage2 <- "weighted"
df_BLUEs_no_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=F)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_no_weights$stage2 <- "not weighted"
df_BLUES <- bind_rows(df_BLUEs_weights, df_BLUEs_no_weights)
df_BLUES <- df_BLUES %>% rename(predict = BLUE)
# Validate with true values
df_BLUES_validate <- inner_join(df_genotypes_true, df_BLUES, by=c("genotype.id", "parameter"))
df_BLUES_validate <- df_BLUES_validate %>% mutate(label = paste(parameter, model, stage1, stage2, sep="_"))
# plot <- ggplot(data = df_BLUES_validate, aes(value, predict)) +
# geom_point() +
# geom_abline(intercept = 0, slope=1) +
# facet_wrap(~label, scales ="free") +
# ggtitle(paste("Run:", run, set))
# plot(plot)
df_BLUES_validate$set <- set
write_csv(df_BLUES_validate, paste0(path_simulation, "/", run, "/", set, "_reversegenotype_true_versus_predict.csv"))
} else {
print("Results present, skip")
}
}
#})
}
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment