Commit 544d353a authored by luroth's avatar luroth
Browse files

preparation for cleanup of code

parent e654b224
...@@ -54,8 +54,8 @@ measurement_dates_sets <- list( ...@@ -54,8 +54,8 @@ measurement_dates_sets <- list(
# Read data # Read data
df_designs <- read_csv('Simulation/designs.csv') df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row * if_else(plot.replication > 1, 2, 1), df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range * if_else(plot.replication > 1, 2, 1)) plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv') df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv') df_temp <- read_csv('Simulation/covariate_temp.csv')
......
...@@ -97,8 +97,8 @@ measurement_dates_sets <- list( ...@@ -97,8 +97,8 @@ measurement_dates_sets <- list(
# Read data # Read data
df_designs <- read_csv('Simulation/designs.csv') df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row * if_else(plot.replication > 1, 2, 1), df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range * if_else(plot.replication > 1, 2, 1)) plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv') df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv') df_temp <- read_csv('Simulation/covariate_temp.csv')
......
...@@ -97,8 +97,8 @@ measurement_dates_sets <- list( ...@@ -97,8 +97,8 @@ measurement_dates_sets <- list(
# Read data # Read data
df_designs <- read_csv('Simulation/designs.csv') df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row * if_else(plot.replication > 1, 2, 1), df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range * if_else(plot.replication > 1, 2, 1)) plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv') df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv') df_temp <- read_csv('Simulation/covariate_temp.csv')
......
...@@ -97,8 +97,8 @@ measurement_dates_sets <- list( ...@@ -97,8 +97,8 @@ measurement_dates_sets <- list(
# Read data # Read data
df_designs <- read_csv('Simulation/designs.csv') df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row * if_else(plot.replication > 1, 2, 1), df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range * if_else(plot.replication > 1, 2, 1)) plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv') df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv') df_temp <- read_csv('Simulation/covariate_temp.csv')
......
# Working directory with temperature data # Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing' path_home <- './'
path_simulation <- 'E:/Simulation/Runs_Posterior' path_simulation <- '/home/luroth/Dokumente/Local_workspace/Simulation_Runs'
setwd(path_home) setwd(path_home)
...@@ -12,7 +12,7 @@ library(ggplot2) ...@@ -12,7 +12,7 @@ library(ggplot2)
library(lubridate) library(lubridate)
library(gridExtra) library(gridExtra)
library(broom) library(broom)
library(zoo) #library(zoo)
library(plyr) library(plyr)
library(stringr) library(stringr)
library(SpATS) library(SpATS)
...@@ -31,7 +31,7 @@ source("R/Model/Graphs.R") ...@@ -31,7 +31,7 @@ source("R/Model/Graphs.R")
# Missing from euler: 1, 3, 8 # Missing from euler: 1, 3, 8
start_run <- 1 start_run <- 1
max_runs <- 500 max_runs <- 500
number_of_cpus <- 29 number_of_cpus <- 1
sigma_error <- 10 sigma_error <- 10
...@@ -99,21 +99,21 @@ measurement_dates_sets <- list( ...@@ -99,21 +99,21 @@ measurement_dates_sets <- list(
# Read data # Read data
df_designs <- read_csv('Simulation/designs.csv') df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row * if_else(plot.replication > 1, 2, 1), df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range * if_else(plot.replication > 1, 2, 1)) plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv') df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv') df_temp <- read_csv('Simulation/covariate_temp.csv')
# run <- 1 run <- 16
# for(run in start_run:max_runs) { for(run in start_run:max_runs) {
cl <- parallel::makeCluster(number_of_cpus) # cl <- parallel::makeCluster(number_of_cpus)
doParallel::registerDoParallel(cl) # doParallel::registerDoParallel(cl)
foreach(run = start_run:max_runs, .verbose = FALSE, # foreach(run = start_run:max_runs, .verbose = FALSE,
.packages = c("fs", "readr", "tidyr", "purrr", "ggplot2", "lubridate", "gridExtra", "plyr", "stringr", "SpATS", "dplyr", "scam", "MASS") # .packages = c("fs", "readr", "tidyr", "purrr", "ggplot2", "lubridate", "gridExtra", "plyr", "stringr", "SpATS", "dplyr", "scam", "MASS")
) %dopar% { # ) %dopar% {
# Validation data # Validation data
...@@ -194,7 +194,7 @@ foreach(run = start_run:max_runs, .verbose = FALSE, ...@@ -194,7 +194,7 @@ foreach(run = start_run:max_runs, .verbose = FALSE,
### Regain parameters from simulated data ### Regain parameters from simulated data
df_values_original <- df_values_for_fit df_values_original <- df_values_for_fit
df_values_for_fit <- df_values_for_fit %>% filter(plot.UID == "FIP20170378")
### Fit p-splines ### Fit p-splines
print("Fit spline") print("Fit spline")
......
# Working directory with temperature data
path_home <- './'
path_simulation <- '/home/luroth/Dokumente/Local_workspace/Simulation_Runs'
setwd(path_home)
# Libraries to use
library(readr)
library(tidyr)
library(purrr)
library(ggplot2)
library(lubridate)
library(gridExtra)
library(broom)
#library(zoo)
library(plyr)
library(stringr)
library(SpATS)
library(fs)
library(foreach)
library(dplyr)
source("R/Model/Spline_QMER.R")
source("R/Model/Dose_response.R")
#source("R/Model/FitREML.R")
source("R/Model/FitSpATS.R")
source("R/Model/Graphs.R")
# Missing from euler: 1, 3, 8
start_run <- 1
max_runs <- 500
number_of_cpus <- 1
sigma_error <- 10
# Plot control overview
plot_ids <- c(
"FIP20150006",
"FIP20160002",
"FIP20170009",
"FIP20180013",
"FIP20190003")
## Measurement frequencies
measurement_dates_freq_1d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 1), "%m%d")
)
measurement_dates_freq_2d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 2), "%m%d")
)
measurement_dates_freq_3d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 3), "%m%d")
)
measurement_dates_freq_4d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 4), "%m%d")
)
measurement_dates_freq_5d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 5), "%m%d")
)
measurement_dates_freq_6d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 6), "%m%d")
)
measurement_dates_freq_7d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 7), "%m%d")
)
measurement_dates_freq_9d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 9), "%m%d")
)
measurement_dates_freq_11d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 11), "%m%d")
)
measurement_dates_freq_14d <- c(
format(seq.Date(from = as.Date("2020-03-01"), as.Date("2020-07-20"), by = 14), "%m%d")
)
measurement_dates_sets <- list(
"3 d" = measurement_dates_freq_3d
# "1 d" = measurement_dates_freq_1d,
# "5 d" = measurement_dates_freq_5d,
# "7 d" = measurement_dates_freq_7d,
# "14 d" = measurement_dates_freq_14d
)
# Read data
df_designs <- read_csv('Simulation/designs.csv')
df_designs <- df_designs %>% mutate(plot.discrete_x = plot.row + if_else(plot.replication > 1, 25, 1),
plot.discrete_y = plot.range + if_else(plot.replication > 1, 22, 1))
df_genotypes <- read_csv('Simulation/genotypes.csv')
df_temp <- read_csv('Simulation/covariate_temp.csv')
run <- 16
#for(run in start_run:max_runs) {
cl <- parallel::makeCluster(number_of_cpus)
doParallel::registerDoParallel(cl)
foreach(run = start_run:max_runs, .verbose = FALSE,
.packages = c("fs", "readr", "tidyr", "purrr", "ggplot2", "lubridate", "gridExtra", "plyr", "stringr", "SpATS", "dplyr", "scam", "MASS")
) %dopar% {
# Validation data
df_genotypes_yearsite_true <- read_csv(paste0(path_simulation, "/", run, "/genotype_yearsite_params.csv"))
df_genotypes_yearsite_true <- df_genotypes_yearsite_true %>% mutate(
bplm_slope = bplm_Asym / (bplm_cOpt- bplm_c0))
df_genotypes_yearsite_true <- df_genotypes_yearsite_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym, final_height, bplm_slope), names_to = "parameter")
df_genotypes_true <- read_csv(paste0(path_simulation, "/", run, "/genotypes_params.csv"))
df_genotypes_true <- df_genotypes_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym), names_to = "parameter")
print(paste0("Run ", run))
df_trait_values <- read_csv(paste0(path_simulation, "/", run, "/trait_values.csv"), col_types = cols(
plot.UID = col_character(),
method.id = col_double(),
method.name = col_character(),
trait.id = col_double(),
trait.name = col_character(),
trait.label = col_character(),
responsible = col_character(),
timestamp = col_datetime(format = ""),
value = col_double()))
### Merge design and plot information
df_values_for_fit_orig_ <- inner_join(inner_join(df_trait_values, df_designs, by="plot.UID"), df_genotypes, by="genotype.id")
i <- length(measurement_dates_sets)
i <- 1
for(i in length(measurement_dates_sets):1) {
measurement_dates_set_name = names(measurement_dates_sets)[[i]]
measurement_dates_set = measurement_dates_sets[[i]]
set <- paste0("set", str_replace_all(measurement_dates_set_name, " ", ""))
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_year_site_BLUE_predict.csv"))
if(length(files)==0) {
#try({
fs::file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_start_plot.txt"))
df_values_for_fit_orig_ <- df_values_for_fit_orig_ %>%
filter(format(timestamp, "%m%d") %in% measurement_dates_set)
# Initial correction to test van Eeujwick 2018:
df_BLUEs_for_fit_orig_ <- df_values_for_fit_orig_ %>%
mutate(se=1, year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, timestamp) %>%
nest() %>%
mutate(BLUEs = map(data, fit_SpATS, paste(year_site.UID_, timestamp), use_weights =FALSE, use_checks=TRUE))
df_BLUEs_for_fit_orig_ <- df_BLUEs_for_fit_orig_ %>% unnest(BLUEs)
df_BLUEs_for_fit_orig_ <- df_BLUEs_for_fit_orig_ %>%
mutate(year_site.UID = paste0(year_site.UID_, "_corrected"),
value = BLUE, value_se = BLUE_SE, plot.UID = paste0("BLUE_", genotype.id, year_site.UID))
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_reverse_SpATS.txt"))
df_values_for_fit_orig_$value_se <- 1
df_values_for_fit_orig <- bind_rows(df_values_for_fit_orig_, df_BLUEs_for_fit_orig_)
df_values_for_fit <- df_values_for_fit_orig
# Add timepoint of preliminar measurement and value delta (growth) to each timepoint
df_values_for_fit <- df_values_for_fit %>% group_by(plot.UID) %>%
arrange(timestamp) %>% mutate(lag_timestamp = lag(timestamp), value_delta = value - lag(value))
# Remove first measurements, they do not suite as delta measurement
df_values_for_fit <- df_values_for_fit %>% filter(!is.na(lag_timestamp))
# Calcualte growth rate
df_values_for_fit <- df_values_for_fit %>% group_by(plot.UID) %>%
mutate(time_diff = difftime(timestamp, lag_timestamp, units = "hours"))
###############################################
### Regain parameters from simulated data
df_values_original <- df_values_for_fit
df_values_for_fit <- df_values_for_fit %>% filter(plot.UID == "FIP20170378")
### Fit p-splines
print("Fit spline")
df_spline_model <- df_values_for_fit %>%
ungroup() %>%
group_by(plot.UID, year_site.UID) %>%
nest() %>%
mutate(spline_model = map(data,
~fit_scam_spline_weights(.$timestamp, .$value, .$value_se)))
df_spline_model <- df_spline_model %>% dplyr::select(-data)
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_fit_spline.txt"))
# Predict value with spline model
time_interval <- 60*60*12
prediction_timepoints <- df_values_for_fit %>%
group_by(year_site.UID) %>%
filter(timestamp > min(timestamp) & timestamp < max(timestamp)) %>%
summarize(prediction_timepoint = list(seq(round_any(min(timestamp), time_interval, f= ceiling),
round_any(max(timestamp) -days(5), time_interval, f= floor),
time_interval)))
predictions <- inner_join(df_spline_model, prediction_timepoints, by="year_site.UID")
df_spline_predicts <- predictions %>%
ungroup() %>%
group_by(plot.UID) %>%
do(spline_predicts = tibble(
predict = predict_scam_spline(.$spline_model[[1]], list(x=.$prediction_timepoint[[1]])),
predict_se = predict_scam_spline(.$spline_model[[1]], list(x=.$prediction_timepoint[[1]]), se = T),
predict_deriv = predict_scam_spline(.$spline_model[[1]], list(x=.$prediction_timepoint[[1]]), deriv = T),
timestamp = .$prediction_timepoint[[1]]))
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_predict_spline.txt"))
# Find start/stop and maximum height
print("Find start/stop")
df_growth_phase_predicts <- df_spline_predicts %>% ungroup() %>% group_by(plot.UID) %>%
mutate(growth_phase_params = map(spline_predicts, find_start_stop_growth_phase))
# Drop large spline objects
rm(df_spline_predicts)
gc()
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
# Extract predicted values
df_growth_phase_predicts <- df_growth_phase_predicts %>%
unnest(growth_phase_params)
measurements <- df_values_for_fit %>% filter(plot.UID %in% plot_ids)
predicts <- df_growth_phase_predicts %>% filter(plot.UID %in% plot_ids) %>% unnest(spline_predicts)
predicts <- inner_join(predicts, df_designs %>% dplyr::select(plot.UID, genotype.id), by="plot.UID")
predicts <- inner_join(predicts, df_genotypes, by="genotype.id")
# Add predicted parameters to plots to perform two-stage processing
df_growth_phase_predicts_ <- df_growth_phase_predicts %>%
mutate(
predict_final_height = predict_final_value,
predict_final_height_se = predict_final_value_se,
predict_start_growth = yday(predict_start_growth),
predict_stop_growth = yday(predict_stop_growth)) %>%
dplyr::select(plot.UID,
predict_final_height, predict_final_height_se,
predict_start_growth, predict_start_growth_se,
predict_stop_growth, predict_stop_growth_se)
df_growth_phase_predicts_values <- df_growth_phase_predicts_ %>%
dplyr::select(plot.UID, predict_start_growth, predict_stop_growth, predict_final_height) %>%
pivot_longer(
cols= c(predict_final_height, predict_start_growth, predict_stop_growth), names_to = "parameter", values_to = "predict")
df_growth_phase_predicts_se <- df_growth_phase_predicts_ %>%
dplyr::select(plot.UID, predict_final_height_se,
predict_start_growth_se, predict_stop_growth_se) %>%
pivot_longer(
cols= c(
predict_start_growth_se, predict_stop_growth_se, predict_final_height_se), names_to = "parameter", values_to = "se") %>%
mutate(parameter = str_sub(parameter, 1, -4))
df_growth_phase_predicts_ <- inner_join(df_growth_phase_predicts_values, df_growth_phase_predicts_se, by=c("plot.UID", "parameter"))
df_growth_phase_predicts_BLUE <- df_growth_phase_predicts_ %>%
filter(endsWith(plot.UID, "_corrected"))
df_growth_phase_predicts_ <- df_growth_phase_predicts_ %>%
filter(!endsWith(plot.UID, "_corrected"))
# Comparison with true values
df_genotype_predicts <- df_growth_phase_predicts_ %>% ungroup() %>%
dplyr::select(plot.UID, parameter, predict, se) %>%
mutate(parameter = str_remove(parameter, "predict_")) %>%
inner_join(df_genotypes_yearsite_true, by=c("plot.UID", "parameter"))
## Percentile predictions
# Add predicted parameters to plots to perform two-stage processing
df_growth_phase_predicts_ <- df_growth_phase_predicts %>%
mutate(
predict_p15 = yday(predict_p15),
predict_p95 = yday(predict_p95)) %>%
dplyr::select(plot.UID,
predict_p15, predict_p15_se,
predict_p95, predict_p95_se)
df_growth_phase_predicts_values <- df_growth_phase_predicts_ %>%
dplyr::select(plot.UID, predict_p15, predict_p95) %>%
pivot_longer(
cols= c(predict_p15, predict_p95), names_to = "parameter", values_to = "predict")
df_growth_phase_predicts_se <- df_growth_phase_predicts_ %>%
dplyr::select(plot.UID,
predict_p15_se, predict_p95_se) %>%
pivot_longer(
cols= c(
predict_p15_se, predict_p95_se), names_to = "parameter", values_to = "se") %>%
mutate(parameter = str_sub(parameter, 1, -4))
df_growth_phase_predicts_ <- inner_join(df_growth_phase_predicts_values, df_growth_phase_predicts_se, by=c("plot.UID", "parameter"))
df_percentile_predicts_BLUE <- df_growth_phase_predicts_ %>%
filter(endsWith(plot.UID, "_corrected"))
df_growth_phase_predicts_ <- df_growth_phase_predicts_ %>%
filter(!endsWith(plot.UID, "_corrected"))
# Comparison with true values
df_genotype_predicts_percentile <- df_growth_phase_predicts_ %>% ungroup() %>%
dplyr::select(plot.UID, parameter, predict, se) %>%
mutate(parameter = str_remove(parameter, "predict_")) %>%
mutate(parameter = case_when(
parameter == "p15" ~ "start_growth",
parameter == "p95" ~ "stop_growth",
TRUE ~"unknown"
)) %>%
inner_join(df_genotypes_yearsite_true, by=c("plot.UID", "parameter"))
df_genotype_predicts$model <- "spline"
df_genotype_predicts_percentile$model <- "percentile"
# df_all_params <- bind_rows(df_genotype_predicts_lm_simple, df_genotype_predicts_asym_simple,
# df_genotype_predicts_simple, df_genotype_predicts_course, df_genotype_predicts_asym_course,
# df_genotype_predicts, df_genotype_predicts_percentile)
df_all_params <- bind_rows(df_genotype_predicts, df_genotype_predicts_percentile)
df_all_params <- inner_join(df_all_params, df_designs, by= c("plot.UID", "year_site.UID", "genotype.id"))
df_all_params <- df_all_params %>% mutate(label = paste(parameter, model, year_site.UID, sep = "_"))
df_all_params$set <- set
write_csv(df_all_params, paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict.csv"))
df_all_params_BLUE <- bind_rows(df_growth_phase_predicts_BLUE, df_percentile_predicts_BLUE)
df_all_params_BLUE$set <- set
write_csv(df_all_params_BLUE, paste0(path_simulation, "/", run, "/", set, "_year_site_BLUE_predict.csv"))
rm(df_all_params)
rm(df_genotype_predicts)
rm(df_all_params_BLUE)
rm(df_growth_phase_predicts_)
rm(df_growth_phase_predicts)
gc()
} else {
print("Results present, skip")
}
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment