Commit e97e698b authored by luroth's avatar luroth
Browse files

preparation for publication: minimal example added, code cleanup

parent 67edcead
......@@ -30,7 +30,7 @@ sigma_error <- 10
# Plot for control overviews
plot_ids <- c(
"FIP20160002",
"FIP20170009",
"FIP20170143",
"FIP20180013")
## Measurement frequencies
......
# Working directory with temperature data
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_wheat_canopy_height_processing'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
......
# Working directory with temperature data
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_wheat_canopy_height_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
# Libraries to use
library(readr)
library(tidyr)
library(purrr)
library(ggplot2)
library(lubridate)
library(gridExtra)
library(broom)
library(zoo)
library(plyr)
library(stringr)
library(SpATS)
library(foreach)
library(dplyr)
source("R/Model/Spline_QMER.R")
source("R/Model/Dose_response.R")
#source("R/Model/FitREML.R")
source("R/Model/FitSpATS.R")
source("R/Model/Graphs.R")
start_run <- 1
max_runs <- 1
number_of_cpus <- 35
sigma_error <- 10
# Plot control overview
plot_ids <- c(
"FIP20150006",
"FIP20160002",
"FIP20170009",
"FIP20180013",
"FIP20190003")
## Measurement frequencies
measurement_dates_freq_3d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 3), "%m%d")
)
measurement_dates_sets <- list(
"3 d" = measurement_dates_freq_3d
)
# Read data
df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv')
#run <- 1
#for(run in start_run:max_runs) {
cl <- parallel::makeCluster(number_of_cpus)
doParallel::registerDoParallel(cl)
foreach(run = start_run:max_runs, .verbose = TRUE,
.packages = c("readr", "tidyr", "purrr", "ggplot2", "lubridate", "gridExtra", "plyr", "stringr", "SpATS", "dplyr", "scam")
) %dopar% {
# Validation data
df_genotypes_yearsite_true <- read_csv(paste0(path_simulation, "/", run, "/genotype_yearsite_params.csv"))
df_genotypes_yearsite_true <- df_genotypes_yearsite_true %>% mutate(
bplm_slope = bplm_Asym / (bplm_cOpt- bplm_c0))
df_genotypes_yearsite_true <- df_genotypes_yearsite_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym, final_height, bplm_slope), names_to = "parameter")
df_genotypes_true <- read_csv(paste0(path_simulation, "/", run, "/genotypes_params.csv"))
df_genotypes_true <- df_genotypes_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym), names_to = "parameter")
print(paste0("Run ", run))
df_trait_values <- read_csv(paste0(path_simulation, "/", run, "/trait_values.csv"), col_types = cols(
plot.UID = col_character(),
method.id = col_double(),
method.name = col_character(),
trait.id = col_double(),
trait.name = col_character(),
trait.label = col_character(),
responsible = col_character(),
timestamp = col_datetime(format = ""),
value = col_double()))
### Merge design and plot information
df_values_for_fit_orig <- inner_join(inner_join(df_trait_values, df_designs, by="plot.UID"), df_genotypes, by="genotype.id")
i <- length(measurement_dates_sets)
i <- 1
for(i in length(measurement_dates_sets):1) {
measurement_dates_set_name = names(measurement_dates_sets)[[i]]
measurement_dates_set = measurement_dates_sets[[i]]
set <- paste0("set", str_replace_all(measurement_dates_set_name, " ", ""))
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict_mod2.csv"))
if(length(files)==0) {
#try({
df_values_for_fit <- df_values_for_fit_orig %>%
filter(format(timestamp, "%m%d") %in% measurement_dates_set)
# First stage BLUEs calculation
df_primary_trait_BLUEs <- df_values_for_fit %>%
mutate(year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, timestamp) %>%
nest() %>%
partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =FALSE, use_checks=TRUE)) %>%
collect()
df_primary_trait_BLUEs <- df_primary_trait_BLUEs %>% unnest(BLUEs) %>% unnest(BLUE)
df_primary_trait_BLUEs
# Add timepoint of preliminar measurement and value delta (growth) to each timepoint
df_values_for_fit <- df_values_for_fit %>% group_by(plot.UID) %>%
arrange(timestamp) %>% mutate(lag_timestamp = lag(timestamp), value_delta = value - lag(value))
# Remove first measurements, they do not suite as delta measurement
df_values_for_fit <- df_values_for_fit %>% filter(!is.na(lag_timestamp))
# Calcualte growth rate
df_values_for_fit <- df_values_for_fit %>% group_by(plot.UID) %>%
mutate(time_diff = difftime(timestamp, lag_timestamp, units = "hours"))
###############################################
### Regain parameters from simulated data
df_values_original <- df_values_for_fit
### Fit p-splines
print("Fit spline")
df_spline_model <- df_values_for_fit %>%
ungroup() %>%
group_by(plot.UID, year_site.UID) %>%
nest() %>%
mutate(spline_model = map(data,
~fit_scam_spline(.$timestamp, .$value)))
df_spline_model <- df_spline_model %>% select(-data)
# Predict value with spline model
time_interval <- 60*60*12
prediction_timepoints <- df_values_for_fit %>%
group_by(year_site.UID) %>%
filter(timestamp > min(timestamp) & timestamp < max(timestamp)) %>%
summarize(prediction_timepoint = list(seq(round_any(min(timestamp), time_interval, f= ceiling),
round_any(max(timestamp) -days(5), time_interval, f= floor),
time_interval)))
predictions <- inner_join(df_spline_model, prediction_timepoints, by="year_site.UID")
df_spline_predicts <- predictions %>%
ungroup() %>%
group_by(plot.UID) %>%
do(spline_predicts = tibble(
predict = predict_scam_spline(.$spline_model[[1]], list(x=.$prediction_timepoint[[1]])),
predict_se = predict_scam_spline(.$spline_model[[1]], list(x=.$prediction_timepoint[[1]]), se = T),
predict_deriv = predict_scam_spline(.$spline_model[[1]], list(x=.$prediction_timepoint[[1]]), deriv = T),
timestamp = .$prediction_timepoint[[1]]))
# Find start/stop and maximum height
print("Find start/stop")
df_growth_phase_predicts <- df_spline_predicts %>% ungroup() %>% group_by(plot.UID) %>%
mutate(growth_phase_params = map(spline_predicts, find_start_stop_growth_phase))
# Extract predicted values
df_growth_phase_predicts <- df_growth_phase_predicts %>%
unnest(growth_phase_params)
measurements <- df_values_for_fit %>% filter(plot.UID %in% plot_ids)
predicts <- df_growth_phase_predicts %>% filter(plot.UID %in% plot_ids) %>% unnest(spline_predicts)
predicts <- inner_join(predicts, df_designs %>% select(plot.UID, genotype.id), by="plot.UID")
predicts <- inner_join(predicts, df_genotypes, by="genotype.id")
# Add predicted parameters to plots to perform two-stage processing
df_growth_phase_predicts_ <- df_growth_phase_predicts %>%
mutate(
predict_final_height = predict_final_value,
predict_final_height_se = predict_final_value_se,
predict_start_growth = yday(predict_start_growth),
predict_stop_growth = yday(predict_stop_growth)) %>%
select(plot.UID,
predict_final_height, predict_final_height_se,
predict_start_growth, predict_start_growth_se,
predict_stop_growth, predict_stop_growth_se)
df_growth_phase_predicts_values <- df_growth_phase_predicts_ %>%
select(plot.UID, predict_start_growth, predict_stop_growth, predict_final_height) %>%
pivot_longer(
cols= c(predict_final_height, predict_start_growth, predict_stop_growth), names_to = "parameter", values_to = "predict")
df_growth_phase_predicts_se <- df_growth_phase_predicts_ %>%
select(plot.UID, predict_final_height_se,
predict_start_growth_se, predict_stop_growth_se) %>%
pivot_longer(
cols= c(
predict_start_growth_se, predict_stop_growth_se, predict_final_height_se), names_to = "parameter", values_to = "se") %>%
mutate(parameter = str_sub(parameter, 1, -4))
df_growth_phase_predicts_ <- inner_join(df_growth_phase_predicts_values, df_growth_phase_predicts_se, by=c("plot.UID", "parameter"))
# Comparison with true values
df_genotype_predicts <- df_growth_phase_predicts_ %>% ungroup() %>%
select(plot.UID, parameter, predict, se) %>%
mutate(parameter = str_remove(parameter, "predict_")) %>%
inner_join(df_genotypes_yearsite_true, by=c("plot.UID", "parameter"))
## Percentile predictions
# Add predicted parameters to plots to perform two-stage processing
df_growth_phase_predicts_ <- df_growth_phase_predicts %>%
mutate(
predict_p15 = yday(predict_p15),
predict_p95 = yday(predict_p95)) %>%
select(plot.UID,
predict_p15, predict_p15_se,
predict_p95, predict_p95_se)
df_growth_phase_predicts_values <- df_growth_phase_predicts_ %>%
select(plot.UID, predict_p15, predict_p95) %>%
pivot_longer(
cols= c(predict_p15, predict_p95), names_to = "parameter", values_to = "predict")
df_growth_phase_predicts_se <- df_growth_phase_predicts_ %>%
select(plot.UID,
predict_p15_se, predict_p95_se) %>%
pivot_longer(
cols= c(
predict_p15_se, predict_p95_se), names_to = "parameter", values_to = "se") %>%
mutate(parameter = str_sub(parameter, 1, -4))
df_growth_phase_predicts_ <- inner_join(df_growth_phase_predicts_values, df_growth_phase_predicts_se, by=c("plot.UID", "parameter"))
# Comparison with true values
df_genotype_predicts_percentile <- df_growth_phase_predicts_ %>% ungroup() %>%
select(plot.UID, parameter, predict, se) %>%
mutate(parameter = str_remove(parameter, "predict_")) %>%
mutate(parameter = case_when(
parameter == "p15" ~ "start_growth",
parameter == "p95" ~ "stop_growth",
TRUE ~"unknown"
)) %>%
inner_join(df_genotypes_yearsite_true, by=c("plot.UID", "parameter"))
########################################
## Fit growth response curves
print("Fit growth curves")
# Filter measurement data for growth period
df_growth_fit <- inner_join(df_values_original, df_growth_phase_predicts, by="plot.UID")
# Filter for growth period
df_growth_fit <- df_growth_fit %>%
filter(timestamp >= predict_start_growth) %>%
filter(timestamp <= predict_stop_growth)
## Add temperature course to PH measurements
# Generate temperature course lookup table
df_covar_course_lookup <- df_growth_fit %>% ungroup() %>%
select(timestamp, lag_timestamp) %>% unique() %>%
mutate(temp_course_id = seq(n()))
df_growth_fit <- inner_join(df_growth_fit, df_covar_course_lookup, by=c("timestamp", "lag_timestamp"))
# Fill with values
extract_covar <- function(df, from, to) {
df_ <- df %>% filter(timestamp > from, timestamp <= to)
return(df_)
}
df_temp_course_lookup <- df_covar_course_lookup %>% group_by(temp_course_id) %>%
nest() %>%
mutate(temperature_course =
map(data, ~ extract_covar(df_temp, .$lag_timestamp, .$timestamp))) %>%
unnest(data) %>%
select(-timestamp, -lag_timestamp)
df_temp_course_lookup <- df_temp_course_lookup %>%
group_by(temp_course_id) %>%
mutate(mean_temp = map_dbl(temperature_course, ~mean(.$value)))
# Join temperature course to PH measurements
df_growth_fit <- inner_join(df_growth_fit, df_temp_course_lookup, by="temp_course_id")
remove(df_temp_course_lookup)
df_growth_fit <- df_growth_fit %>% mutate(temps = map(temperature_course, function (df) df$value))
# Calcualte growth rate
df_growth_fit_ <- df_growth_fit %>%
group_by(plot.UID) %>%
mutate(time_diff = difftime(timestamp, lag_timestamp, units = "hours"))
df_growth_fit_ <- df_growth_fit_ %>% mutate(value_rate = value_delta / as.numeric(time_diff))
df_growth_fit_ <- df_growth_fit_ %>% mutate(value_delta = value_rate, temps = mean_temp)
# Simple lm
models_simple_lm <- df_growth_fit_ %>%
group_by(year_site.UID, plot.UID, method.id, trait.label) %>%
nest() %>%
mutate(model = map(data, ~lm(value_rate ~ mean_temp + 1, data = .)))
models_simple_lm <- models_simple_lm %>%
mutate(coefs = map(model, ~coef(.)),
coefs_se = map(model, ~coef(summary(.))[, "Std. Error"]))
# Extract parameters
params_simple_lm <- models_simple_lm %>%
unnest_wider(coefs) %>%
rename(lm_intercept = "(Intercept)", lm_slope = mean_temp) %>%
unnest_wider(coefs_se) %>%
rename(lm_intercept_se = "(Intercept)", lm_slope_se = mean_temp) %>%
mutate(lm_intercept = - lm_intercept / lm_slope)
# Convert to long format
params_simple_lm_long <- params_simple_lm %>%
pivot_longer(c(lm_intercept, lm_slope), names_to = "parameter", values_to = "value") %>%
select(-data) %>%
mutate(se = if_else(parameter == "lm_intercept", lm_intercept_se, lm_slope_se)) %>%
select(-lm_intercept_se, -lm_slope_se)
## Simple bplm
print("Simple bplm")
models_simple_bplm <- df_growth_fit_ %>%
group_by(year_site.UID, plot.UID) %>%
do(model = bplm_fit(., fixed_par = c(sigma_error = sigma_error)))
params_simple_bplm_long <- models_simple_bplm %>% unnest(cols = c(model))
print("Simple asym")
models_simple_asym <- df_growth_fit_ %>%
group_by(year_site.UID, plot.UID) %>%
do(model = asym_fit(., fixed_par = c(sigma_error = sigma_error)))
params_simple_asym_long <- models_simple_asym %>% unnest(cols = c(model))
# Fit asym
print("Course asym")
models_course_asym <- df_growth_fit %>%
group_by(year_site.UID, plot.UID) %>%
do(model = asym_fit(., fixed_par = c(sigma_error = sigma_error)))
params_course_asym_long <- models_course_asym %>% unnest(cols = c(model))
# Fit bplm
print("Course bplm")
models_course_bplm <- df_growth_fit %>%
group_by(year_site.UID, plot.UID) %>%
do(model = bplm_fit(., fixed_par = c(sigma_error = sigma_error)))
params_course_bplm_long <- models_course_bplm %>% unnest(cols = c(model))
# Comparison with true values
df_genotype_predicts_simple <- params_simple_bplm_long %>% ungroup() %>%
select(plot.UID, parameter, value, se) %>%
rename(predict = value) %>%
inner_join(df_genotypes_yearsite_true)
df_genotype_predicts_asym_simple <- params_simple_asym_long %>% ungroup() %>%
select(plot.UID, parameter, value, se) %>%
mutate(parameter = case_when(
parameter == "asym_c0" ~ "bplm_c0",
parameter == "asym_Asym" ~ "bplm_Asym",
TRUE ~ "bplm_slope"
)) %>%
rename(predict = value) %>%
inner_join(df_genotypes_yearsite_true)
df_genotype_predicts_course <- params_course_bplm_long %>% ungroup() %>%
select(plot.UID, parameter, value, se) %>%
rename(predict = value) %>%
inner_join(df_genotypes_yearsite_true)
df_genotype_predicts_asym_course <- params_course_asym_long %>% ungroup() %>%
select(plot.UID, parameter, value, se) %>%
mutate(parameter = case_when(
parameter == "asym_c0" ~ "bplm_c0",
parameter == "asym_Asym" ~ "bplm_Asym",
TRUE ~ "bplm_slope"
)) %>%
rename(predict = value) %>%
inner_join(df_genotypes_yearsite_true)
df_genotype_predicts_lm_simple <- params_simple_lm_long %>% ungroup() %>%
select(plot.UID, parameter, value, se) %>%
mutate(parameter = case_when(
parameter == "lm_intercept" ~ "bplm_c0",
TRUE ~ "bplm_slope"
)) %>%
rename(predict = value) %>%
inner_join(df_genotypes_yearsite_true)
df_genotype_predicts_lm_simple$model <- "lm_simple"
df_genotype_predicts_asym_simple$model <- "asym_simple"
df_genotype_predicts_simple$model <- "bplm_simple"
df_genotype_predicts_course$model <- "bplm_course"
df_genotype_predicts_asym_course$model <- "asym_course"
df_genotype_predicts$model <- "spline"
df_genotype_predicts_percentile$model <- "percentile"
df_all_params <- bind_rows(df_genotype_predicts_lm_simple, df_genotype_predicts_asym_simple,
df_genotype_predicts_simple, df_genotype_predicts_course, df_genotype_predicts_asym_course,
df_genotype_predicts, df_genotype_predicts_percentile)
df_all_params <- inner_join(df_all_params, df_designs, by= c("plot.UID", "year_site.UID", "genotype.id"))
df_all_params <- df_all_params %>% mutate(label = paste(parameter, model, year_site.UID, sep = "_"))
#
# plot <- ggplot(data=df_all_params,
# aes(x=value, y=predict)) +
# geom_point() +
# facet_wrap(~ label, scales="free") +
# ggtitle(paste("Run:", run, set))
# plot(plot)
df_all_params$set <- set
write_csv(df_all_params, paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict_mod2.csv"))
} else {
print("Results present, skip")
}
}
}
# Working directory with temperature data
path_home <- './'
path_simulation <- '/home/luroth/Dokumente/Local_workspace/Simulation_Runs'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
# Libraries to use
library(readr)
library(tidyr)
......@@ -12,7 +11,6 @@ library(ggplot2)
library(lubridate)
library(gridExtra)
library(broom)
#library(zoo)
library(plyr)
library(stringr)
library(SpATS)
......@@ -24,14 +22,12 @@ library(dplyr)
source("R/Model/Spline_QMER.R")
source("R/Model/Dose_response.R")
#source("R/Model/FitREML.R")
source("R/Model/FitSpATS.R")
source("R/Model/Graphs.R")
# Missing from euler: 1, 3, 8
start_run <- 1
max_runs <- 500
number_of_cpus <- 1
number_of_cpus <- 60
sigma_error <- 10
......@@ -91,10 +87,10 @@ measurement_dates_freq_14d <- c(
measurement_dates_sets <- list(
"3 d" = measurement_dates_freq_3d
# "1 d" = measurement_dates_freq_1d,
# "5 d" = measurement_dates_freq_5d,
# "7 d" = measurement_dates_freq_7d,
# "14 d" = measurement_dates_freq_14d
"1 d" = measurement_dates_freq_1d,
"5 d" = measurement_dates_freq_5d,
"7 d" = measurement_dates_freq_7d,
"14 d" = measurement_dates_freq_14d
)
# Read data
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Working directory with temperature data
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_wheat_canopy_height_processing'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
......
# Working directory with temperature data
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_wheat_canopy_height_processing'
path_simulation <- 'E:/Simulation/Runs_Posterior'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
# Libraries to use
......
# Working directory with temperature data
path_home <- '/home/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- '/home/luroth/public/Evaluation/Projects/KP0012_luroth/9_phenomics_data_processing/Simulation/sim_runs_POWERPC/Runs_ALL'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
......
# Working directory with temperature data
path_home <- '/home/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- '/home/luroth/public/Evaluation/Projects/KP0012_luroth/9_phenomics_data_processing/Simulation/sim_runs_POWERPC/Runs_ALL'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
......@@ -22,7 +22,7 @@ library(dplyr)
source("R/Model/FitREML.R")
source("R/Model/Graphs.R")
start_run <- 22
start_run <- 1
max_runs <- 500
#500
......
# Working directory with temperature data
path_home <- '/home/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- '/home/luroth/public/Evaluation/Projects/KP0012_luroth/9_phenomics_data_processing/Simulation/sim_runs_POWERPC/Runs_ALL'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
setwd(path_home)
......
# Working directory with temperature data
path_home <- '/home/luroth/PycharmProjects/htfp_data_processing'
path_simulation <- '/home/luroth/public/Evaluation/Projects/KP0012_luroth/9_phenomics_data_processing/Simulation/sim_runs_POWERPC/Runs_ALL'
path_home <- 'C:/Users/luroth/PycharmProjects/htfp_data_processing'