Commit e654b224 authored by luroth's avatar luroth
Browse files

multi-threading in stage 2 and 3 removed, makes asreml-R crashing...

parent 0eb158aa
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
path_simulation <- 'E:/Simulation/Runs_ALL'
setwd(path_home)
......@@ -14,6 +14,7 @@ library(gridExtra)
library(plyr)
library(stringr)
library(SpATS)
library(fs)
library(dplyr)
......@@ -29,7 +30,6 @@ start_run <- 1
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
......@@ -101,13 +101,13 @@ df_temp <- read_csv('Simulation/covariate_temp.csv')
# Use mutliprocessing for model fits
cluster <- new_cluster(number_of_cpus)
cluster_library(cluster, c("dplyr", "purrr", "SpATS"))
cluster_copy(cluster, c("fit_SpATS", "compute.mAIC", "compute.mBIC"))
#cluster <- new_cluster(number_of_cpus)
#cluster_library(cluster, c("dplyr", "purrr", "SpATS"))
#cluster_copy(cluster, c("fit_SpATS", "compute.mAIC", "compute.mBIC"))
run <- 19
run <- 1
for(run in start_run:max_runs) {
#try({
......@@ -117,46 +117,49 @@ for(run in start_run:max_runs) {
bplm_slope = bplm_Asym / (bplm_cOpt- bplm_c0))
df_genotypes_true <- df_genotypes_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym, bplm_slope), names_to = "parameter")
print(paste0("Run ", run))
i <- length(measurement_dates_sets)
for(i in length(measurement_dates_sets):1) {
measurement_dates_set_name = names(measurement_dates_sets)[[i]]
measurement_dates_set = measurement_dates_sets[[i]]
set <- paste0("set", str_replace_all(measurement_dates_set_name, " ", ""))
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict.csv"))
if(length(files)==0) {
files_stage1 <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
files_stage23 <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage3_asreml_done.txt"))
if(length(files_stage1)>=1 & (length(files_stage23) == 0)) {
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage23_start.txt"))
df_all_params <- read_csv(paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict.csv"))
df_all_params <- df_all_params %>% filter(!(model %in% c("asym_simple", "bplm_simple", "percentile")))
df_all_params <- df_all_params %>% filter(!(model %in% c("asym_simple", "bplm_simple", "percentile", "lm_simple", "asym_course", "bplm_course"))) %>%
filter(parameter != "final_height")
#----------------------------------------------------------------
# Adjusted means with SpATS: Stage 1
print("Fit stage 1 (SpATS)")
df_all_params <- df_all_params %>% mutate(value_true = value, value = predict)
df_params_yearsite_weighted <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
#partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>%
collect()
df_params_yearsite_weightedpost <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>%
mutate(se = sepost) %>%
mutate(se = sepost * sqrt(1000)) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
#partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>%
collect()
......@@ -164,10 +167,11 @@ for(run in start_run:max_runs) {
mutate(year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
#partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =FALSE, use_checks=TRUE)) %>%
collect()
df_params_BLUES_yearsite_weighted <- df_params_yearsite_weighted %>% unnest(BLUEs)
df_params_BLUES_yearsite_weighted <- df_params_BLUES_yearsite_weighted %>% select(-data)
df_params_BLUES_yearsite_weighted$stage1 <- "weighted"
......@@ -177,56 +181,60 @@ for(run in start_run:max_runs) {
df_params_BLUES_yearsite_not_weighted <- df_params_yearsite_not_weighted %>% unnest(BLUEs)
df_params_BLUES_yearsite_not_weighted <- df_params_BLUES_yearsite_not_weighted %>% select(-data)
df_params_BLUES_yearsite_not_weighted$stage1 <- "not weighted"
df_params_BLUES_yearsite <- bind_rows(df_params_BLUES_yearsite_weighted, df_params_BLUES_yearsite_not_weighted, df_params_BLUES_yearsite_weightedpost)
#df_params_BLUES_yearsite <- bind_rows(df_params_BLUES_yearsite_weighted, df_params_BLUES_yearsite_not_weighted)
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage2_SpATS_done.txt"))
#----------------------------------------------------------------
# Adjusted means with asreml: Stage 2
print("Fit stage 2 (asreml-R)")
df_BLUEs_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=T)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_weights$stage2 <- "weighted"
df_BLUEs_no_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=F)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_no_weights$stage2 <- "not weighted"
df_BLUES <- bind_rows(df_BLUEs_weights, df_BLUEs_no_weights)
df_BLUES <- df_BLUES %>% rename(predict = BLUE)
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage3_asreml_done.txt"))
# Validate with true values
df_BLUES_validate <- inner_join(df_genotypes_true, df_BLUES, by=c("genotype.id", "parameter"))
df_BLUES_validate <- df_BLUES_validate %>% mutate(label = paste(parameter, model, stage1, stage2, sep="_"))
# plot <- ggplot(data = df_BLUES_validate, aes(value, predict)) +
# geom_point() +
# geom_abline(intercept = 0, slope=1) +
# facet_wrap(~label, scales ="free") +
# ggtitle(paste("Run:", run, set))
# plot(plot)
df_BLUES_validate$set <- set
write_csv(df_BLUES_validate, paste0(path_simulation, "/", run, "/", set, "_genotype_true_versus_predict.csv"))
if (FALSE & run == 1 & set == "set3d") {
df_spline_predicts <- read_csv(paste0(path_simulation, "/", run, "/", set, "_spline_predict.csv"))
params_course_bplm_long <- df_BLUES_validate %>% filter(model == "bplm_course", stage1 == "weighted", stage2 == "weighted") %>%
select(genotype.id, parameter, predict) %>% pivot_wider(names_from = parameter, values_from = predict) %>%
filter(bplm_cOpt >0)
params_course_asym_long <- df_BLUES_validate %>% filter(model == "asym_course", stage1 == "weighted", stage2 == "weighted") %>%
select(genotype.id, parameter, predict) %>% pivot_wider(names_from = parameter, values_from = predict)
# Growth response curves
df_growth_response <- data.frame(
temp = seq(0, 30, 0.01))
......@@ -234,14 +242,14 @@ for(run in start_run:max_runs) {
df_growth_response <- df_growth_response %>%
mutate(sim_lm =
growth_response_bp_linear(df_growth_response$temp, bplm_c0, bplm_cOpt, bplm_Asym))
df_growth_response <- data.frame(
temp = seq(0, 30, 0.01))
df_growth_response <- expand_grid(df_growth_response, params_course_asym_long)
df_growth_response <- df_growth_response %>%
mutate(sim_lm =
growth_response_asym(df_growth_response$temp, bplm_Asym, bplm_slope, bplm_c0))
growth_response_asym(df_growth_response$temp, bplm_Asym, bplm_slope, bplm_c0))
df_growth_response$sim <- "Genotypic"
plot_models <- ggplot(data = df_growth_response) +
geom_line(aes(x = temp, y = sim_lm, group = genotype.id), alpha = 0.4) +
......@@ -250,7 +258,7 @@ for(run in start_run:max_runs) {
facet_wrap(~sim ) +
theme +
theme(legend.position = "bottom")
df_spline_ <- df_spline_predicts
df_spline_ <- inner_join(df_spline_, df_designs)
#df_growth_phase_predicts_ <- inner_join(df_growth_phase_predicts, df_designs)
......@@ -260,13 +268,13 @@ for(run in start_run:max_runs) {
scale_x_datetime(NULL) +
scale_y_continuous("Fitted canopy height (m)") +
theme
plot_all <- grid.arrange(plot_models, plot_growth, widths = c(1, 2))
ggsave(filename = paste0("Graphs/Simulation/Overview_fits.pdf"), plot_all, width = 14, height = 6)
}
} else {
print("Results present, skip")
}
......
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
path_simulation <- 'E:/Simulation/Runs_ALL'
setwd(path_home)
......@@ -14,17 +14,17 @@ library(gridExtra)
library(plyr)
library(stringr)
library(asreml)
library(fs)
library(dplyr)
source("R/Model/FitREML.R")
source("R/Model/Graphs.R")
start_run <- 1
start_run <- 473
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
......@@ -95,9 +95,9 @@ df_temp <- read_csv('Simulation/covariate_temp.csv')
run <- 19
run <- 141
for(run in start_run:max_runs) {
gc()
#try({
# Validation data
df_genotypes_true <- read_csv(paste0(path_simulation, "/", run, "/genotypes_params.csv"))
......@@ -105,35 +105,47 @@ for(run in start_run:max_runs) {
bplm_slope = bplm_Asym / (bplm_cOpt- bplm_c0))
df_genotypes_true <- df_genotypes_true %>%
pivot_longer(c(start_growth, stop_growth, bplm_c0, bplm_cOpt, bplm_Asym, bplm_slope), names_to = "parameter")
print(paste0("Run ", run))
i <- length(measurement_dates_sets)
for(i in length(measurement_dates_sets):1) {
measurement_dates_set_name = names(measurement_dates_sets)[[i]]
measurement_dates_set = measurement_dates_sets[[i]]
set <- paste0("set", str_replace_all(measurement_dates_set_name, " ", ""))
print(paste0("Set: ", measurement_dates_set_name))
files_stage1 <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
files_onestage <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_onestage_done.txt"))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict_onestage.csv"))
if(TRUE | length(files)==0) {
if(length(files_onestage)>=1) {
df_test <- read_csv(paste0(path_simulation, "/", run, "/", set, "_genotype_true_versus_predict_onestage.csv"))
if(sum(is.na(df_test$predict))> 20) {
files_onestage <- c()
print("MANY NA, redo run")
}
}
if(length(files_stage1) >= 1 & (length(files_onestage)==0)) {
df_all_params <- read_csv(paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict.csv"))
df_all_params <- df_all_params %>% filter(model %in% c("spline"), parameter != "final_height")
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_onestage_start.txt"))
#----------------------------------------------------------------
# Adjusted means in one stage
df <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>%
group_by(parameter, model) %>%
nest()
df <- df$data[[1]]
fit_onestage_asreml <- function(df) {
# Set factors
df <- df %>% mutate(genotype = as.factor(genotype.id), year_site.UID = as.factor(year_site.UID),
......@@ -150,25 +162,28 @@ for(run in start_run:max_runs) {
select(genotype) %>%
unique()
df <- df %>% filter(genotype %in% genotype_set$genotype)
# Remove NA values
df <- df %>% filter(!is.na(value))
# Re-expand to genotype and year-site set to have full design matrix
genotype_set <- expand_grid(year_site.UID = df$year_site.UID %>% unique(),
genotype = df$genotype %>% unique())
df <- left_join(genotype_set, df, by = c("year_site.UID", "genotype"))
# Drop unused (genotype) levels
df <- droplevels(df)
# Arrange df
df <- df %>% arrange(year_site.UID, genotype)
weights_ <- 1/df$se^2
maxiter <- 1500
weights_ <- 1/(df$se^2)
weights_post <- 1/((df$sepost * sqrt(1000))^2)
weights_post <- if_else(is.finite(weights_post), weights_post, 1)
maxiter <- 50
df_BLUE <- data.frame()
print("weights")
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(value ~ 1 + genotype + year_site.UID, data = df, trace = F,
......@@ -177,7 +192,9 @@ for(run in start_run:max_runs) {
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
fail="soft")
workspace = "20gb"
#fail="soft"
)
summary(fit_BLUE)
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE_ <- data.frame(g.predBLUE$pvals[, c(1, 2)])
......@@ -186,20 +203,51 @@ for(run in start_run:max_runs) {
if(is.null(fit_BLUE)) {
df_BLUE_ <- data.frame(genotype.id = df$genotype.id %>% unique(), BLUE =NA)
}
df_BLUE_ <- df_BLUE_ %>% mutate(genotype.id = as.numeric(as.character(genotype.id)))
df_BLUE_$stage1 <- "weighted"
df_BLUE_$stage2 <- "onestage"
df_BLUE <- bind_rows(df_BLUE, df_BLUE_)
gc()
print("post weights")
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(value ~ 1 + genotype + year_site.UID, data = df, trace = F,
random = ~ at(year_site.UID):ar1v(plot.discrete_x):ar1(plot.discrete_y),
residual = ~dsum(~units | year_site.UID),
weights = weights_post,
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
workspace = "20gb"
#fail="soft"
)
summary(fit_BLUE)
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE_ <- data.frame(g.predBLUE$pvals[, c(1, 2)])
names(df_BLUE_) <- c("genotype.id", "BLUE")
})
if(is.null(fit_BLUE)) {
df_BLUE_ <- data.frame(genotype.id = df$genotype.id %>% unique(), BLUE =NA)
}
df_BLUE_ <- df_BLUE_ %>% mutate(genotype.id = as.numeric(as.character(genotype.id)))
df_BLUE_$stage1 <- "weighted (posterior simulation)"
df_BLUE_$stage2 <- "onestage"
df_BLUE <- bind_rows(df_BLUE, df_BLUE_)
gc()
print("no weights")
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(value ~ 1 + genotype + year_site.UID, data = df, trace = F,
random = ~at(year_site.UID):plot.discrete_x:plot.discrete_y + at(year_site.UID):ar1v(plot.discrete_x):ar1(plot.discrete_y),
random = ~at(year_site.UID):ar1v(plot.discrete_x):ar1(plot.discrete_y),
residual = ~dsum(~units | year_site.UID),
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
fail="soft")
workspace = "20gb"
#fail="soft"
)
summary(fit_BLUE)
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE_ <- data.frame(g.predBLUE$pvals[, c(1, 2)])
......@@ -208,14 +256,15 @@ for(run in start_run:max_runs) {
if(is.null(fit_BLUE)) {
df_BLUE_ <- data.frame(genotype.id = df$genotype.id %>% unique(), BLUE =NA)
}
df_BLUE_ <- df_BLUE_ %>% mutate(genotype.id = as.numeric(as.character(genotype.id)))
df_BLUE_$stage1 <- "not weighted"
df_BLUE_$stage2 <- "onestage"
df_BLUE <- bind_rows(df_BLUE, df_BLUE_)
gc()
return(df_BLUE)
}
df_all <- df_params_yearsite_weighted <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>%
group_by(parameter, model) %>%
......@@ -223,25 +272,26 @@ for(run in start_run:max_runs) {
mutate(BLUEs = map(data, fit_onestage_asreml)) %>%
select(-data)
df_all <- df_all %>% unnest(BLUEs)
df_BLUES <- df_all %>% rename(predict = BLUE)
# Validate with true values
df_BLUES_validate <- inner_join(df_genotypes_true, df_BLUES, by=c("genotype.id", "parameter"))
df_BLUES_validate <- df_BLUES_validate %>% mutate(label = paste(parameter, model, stage1, stage2, sep="_"))
df_BLUES_validate$set <- set
write_csv(df_BLUES_validate, paste0(path_simulation, "/", run, "/", set, "_genotype_true_versus_predict_onestage.csv"))
plot <- ggplot(data = df_BLUES_validate, aes(value, predict)) +
geom_point() +
geom_abline(intercept = 0, slope=1) +
facet_wrap(~label, scales ="free") +
ggtitle(paste("Run:", run, set))
plot(plot)
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_onestage_done.txt"))
# plot <- ggplot(data = df_BLUES_validate, aes(value, predict)) +
# geom_point() +
# geom_abline(intercept = 0, slope=1) +
# facet_wrap(~label, scales ="free") +
# ggtitle(paste("Run:", run, set))
# plot(plot)
} else {
print("Results present, skip")
}
......
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
path_simulation <- 'E:/Simulation/Runs_ALL'
setwd(path_home)
......@@ -14,6 +14,7 @@ library(gridExtra)
library(plyr)
library(stringr)
library(asreml)
library(fs)
library(dplyr)
......@@ -24,8 +25,6 @@ start_run <- 1
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
# Plot control overview
......@@ -95,7 +94,7 @@ df_temp <- read_csv('Simulation/covariate_temp.csv')
run <- 19
run <- 1
for(run in start_run:max_runs) {
#try({
......@@ -118,29 +117,48 @@ for(run in start_run:max_runs) {
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict_onestage.csv"))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
files_res <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_reverse_done.txt"))
if(TRUE | length(files)==0) {
if(length(files)==1 & (length(files_res)== 0)) {
try({
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_reverse_start.txt"))
df_params_BLUES_yearsite <- read_csv(paste0(path_simulation, "/", run, "/", set, "_year_site_BLUE_predict.csv"))
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% filter(model %in% c("spline"), parameter != "final_height")
df_params_BLUES_yearsite$weights_vcov <- 1/(df_params_BLUES_yearsite$BLUE_SE^2)
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% filter(parameter %in% c("predict_start_growth", "predict_stop_growth"))
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% mutate(parameter = substr(parameter, 9, 30),
genotype.id = as.numeric(gsub("BLUE_([0-9]+)FIP[0-9]+_corrected", "\\1", plot.UID)),
year_site.UID_ = gsub("BLUE_[0-9]+(FIP[0-9]+)_corrected", "\\1", plot.UID),
BLUE = predict)
df_params_BLUES_yearsite$model <- "spline"
df_params_BLUES_yearsite$stage1 <- "weighted"
df_params_BLUES_yearsite$weights_vcov <- 1/(df_params_BLUES_yearsite$se^2)
print("Fit stage 2 (asreml-R)")
df_BLUEs_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
df_BLUEs_weights <- df_params_BLUES_yearsite %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=T)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_weights$stage2 <- "weighted"
df_BLUEs_no_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
df_BLUEs_no_weights <- df_params_BLUES_yearsite %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=F)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_no_weights$stage2 <- "not weighted"
df_BLUES <- bind_rows(df_BLUEs_weights, df_BLUEs_no_weights)
df_params_BLUES_yearsite$weights_vcov <- 1/((df_params_BLUES_yearsite$sepost * sqrt(1000))^2)
df_params_BLUES_yearsite$weights_vcov <- if_else(is.finite(df_params_BLUES_yearsite$weights_vcov), df_params_BLUES_yearsite$weights_vcov, 1)
df_BLUEs_no_postweights <- df_params_BLUES_yearsite %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=T)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_no_postweights$stage2 <- "weighted (posterior simulation)"