Commit e654b224 authored by luroth's avatar luroth
Browse files

multi-threading in stage 2 and 3 removed, makes asreml-R crashing...

parent 0eb158aa
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
path_simulation <- 'E:/Simulation/Runs_ALL'
setwd(path_home)
......@@ -14,6 +14,7 @@ library(gridExtra)
library(plyr)
library(stringr)
library(SpATS)
library(fs)
library(dplyr)
......@@ -29,7 +30,6 @@ start_run <- 1
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
......@@ -102,12 +102,12 @@ df_temp <- read_csv('Simulation/covariate_temp.csv')
# Use mutliprocessing for model fits
cluster <- new_cluster(number_of_cpus)
cluster_library(cluster, c("dplyr", "purrr", "SpATS"))
cluster_copy(cluster, c("fit_SpATS", "compute.mAIC", "compute.mBIC"))
#cluster <- new_cluster(number_of_cpus)
#cluster_library(cluster, c("dplyr", "purrr", "SpATS"))
#cluster_copy(cluster, c("fit_SpATS", "compute.mAIC", "compute.mBIC"))
run <- 19
run <- 1
for(run in start_run:max_runs) {
#try({
......@@ -130,12 +130,15 @@ for(run in start_run:max_runs) {
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict.csv"))
files_stage1 <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
files_stage23 <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage3_asreml_done.txt"))
if(length(files)==0) {
if(length(files_stage1)>=1 & (length(files_stage23) == 0)) {
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage23_start.txt"))
df_all_params <- read_csv(paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict.csv"))
df_all_params <- df_all_params %>% filter(!(model %in% c("asym_simple", "bplm_simple", "percentile")))
df_all_params <- df_all_params %>% filter(!(model %in% c("asym_simple", "bplm_simple", "percentile", "lm_simple", "asym_course", "bplm_course"))) %>%
filter(parameter != "final_height")
#----------------------------------------------------------------
# Adjusted means with SpATS: Stage 1
......@@ -147,16 +150,16 @@ for(run in start_run:max_runs) {
mutate(year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
#partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>%
collect()
df_params_yearsite_weightedpost <- df_all_params %>%
mutate(year_site.UID_ = year_site.UID) %>%
mutate(se = sepost) %>%
mutate(se = sepost * sqrt(1000)) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
#partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =TRUE, use_checks=TRUE)) %>%
collect()
......@@ -164,10 +167,11 @@ for(run in start_run:max_runs) {
mutate(year_site.UID_ = year_site.UID) %>%
group_by(year_site.UID_, parameter, model) %>%
nest() %>%
partition(cluster) %>%
#partition(cluster) %>%
mutate(BLUEs = map(data, fit_SpATS, paste(model, parameter), use_weights =FALSE, use_checks=TRUE)) %>%
collect()
df_params_BLUES_yearsite_weighted <- df_params_yearsite_weighted %>% unnest(BLUEs)
df_params_BLUES_yearsite_weighted <- df_params_BLUES_yearsite_weighted %>% select(-data)
df_params_BLUES_yearsite_weighted$stage1 <- "weighted"
......@@ -179,7 +183,9 @@ for(run in start_run:max_runs) {
df_params_BLUES_yearsite_not_weighted$stage1 <- "not weighted"
df_params_BLUES_yearsite <- bind_rows(df_params_BLUES_yearsite_weighted, df_params_BLUES_yearsite_not_weighted, df_params_BLUES_yearsite_weightedpost)
#df_params_BLUES_yearsite <- bind_rows(df_params_BLUES_yearsite_weighted, df_params_BLUES_yearsite_not_weighted)
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage2_SpATS_done.txt"))
#----------------------------------------------------------------
# Adjusted means with asreml: Stage 2
......@@ -201,6 +207,8 @@ for(run in start_run:max_runs) {
df_BLUES <- bind_rows(df_BLUEs_weights, df_BLUEs_no_weights)
df_BLUES <- df_BLUES %>% rename(predict = BLUE)
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_stage3_asreml_done.txt"))
# Validate with true values
df_BLUES_validate <- inner_join(df_genotypes_true, df_BLUES, by=c("genotype.id", "parameter"))
......
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
path_simulation <- 'E:/Simulation/Runs_ALL'
setwd(path_home)
......@@ -14,17 +14,17 @@ library(gridExtra)
library(plyr)
library(stringr)
library(asreml)
library(fs)
library(dplyr)
source("R/Model/FitREML.R")
source("R/Model/Graphs.R")
start_run <- 1
start_run <- 473
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
......@@ -95,9 +95,9 @@ df_temp <- read_csv('Simulation/covariate_temp.csv')
run <- 19
run <- 141
for(run in start_run:max_runs) {
gc()
#try({
# Validation data
df_genotypes_true <- read_csv(paste0(path_simulation, "/", run, "/genotypes_params.csv"))
......@@ -118,13 +118,25 @@ for(run in start_run:max_runs) {
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict_onestage.csv"))
files_stage1 <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
files_onestage <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_onestage_done.txt"))
if(length(files_onestage)>=1) {
df_test <- read_csv(paste0(path_simulation, "/", run, "/", set, "_genotype_true_versus_predict_onestage.csv"))
if(sum(is.na(df_test$predict))> 20) {
files_onestage <- c()
print("MANY NA, redo run")
}
if(TRUE | length(files)==0) {
}
if(length(files_stage1) >= 1 & (length(files_onestage)==0)) {
df_all_params <- read_csv(paste0(path_simulation, "/", run, "/", set, "_plot_true_versus_predict.csv"))
df_all_params <- df_all_params %>% filter(model %in% c("spline"), parameter != "final_height")
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_onestage_start.txt"))
#----------------------------------------------------------------
# Adjusted means in one stage
......@@ -164,11 +176,14 @@ for(run in start_run:max_runs) {
# Arrange df
df <- df %>% arrange(year_site.UID, genotype)
weights_ <- 1/df$se^2
maxiter <- 1500
weights_ <- 1/(df$se^2)
weights_post <- 1/((df$sepost * sqrt(1000))^2)
weights_post <- if_else(is.finite(weights_post), weights_post, 1)
maxiter <- 50
df_BLUE <- data.frame()
print("weights")
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(value ~ 1 + genotype + year_site.UID, data = df, trace = F,
......@@ -177,7 +192,9 @@ for(run in start_run:max_runs) {
weights = weights_,
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
fail="soft")
workspace = "20gb"
#fail="soft"
)
summary(fit_BLUE)
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE_ <- data.frame(g.predBLUE$pvals[, c(1, 2)])
......@@ -191,15 +208,46 @@ for(run in start_run:max_runs) {
df_BLUE_$stage1 <- "weighted"
df_BLUE_$stage2 <- "onestage"
df_BLUE <- bind_rows(df_BLUE, df_BLUE_)
gc()
print("post weights")
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(value ~ 1 + genotype + year_site.UID, data = df, trace = F,
random = ~ at(year_site.UID):ar1v(plot.discrete_x):ar1(plot.discrete_y),
residual = ~dsum(~units | year_site.UID),
weights = weights_post,
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
workspace = "20gb"
#fail="soft"
)
summary(fit_BLUE)
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE_ <- data.frame(g.predBLUE$pvals[, c(1, 2)])
names(df_BLUE_) <- c("genotype.id", "BLUE")
})
if(is.null(fit_BLUE)) {
df_BLUE_ <- data.frame(genotype.id = df$genotype.id %>% unique(), BLUE =NA)
}
df_BLUE_ <- df_BLUE_ %>% mutate(genotype.id = as.numeric(as.character(genotype.id)))
df_BLUE_$stage1 <- "weighted (posterior simulation)"
df_BLUE_$stage2 <- "onestage"
df_BLUE <- bind_rows(df_BLUE, df_BLUE_)
gc()
print("no weights")
fit_BLUE <- NULL
try({
fit_BLUE <- asreml(value ~ 1 + genotype + year_site.UID, data = df, trace = F,
random = ~at(year_site.UID):plot.discrete_x:plot.discrete_y + at(year_site.UID):ar1v(plot.discrete_x):ar1(plot.discrete_y),
random = ~at(year_site.UID):ar1v(plot.discrete_x):ar1(plot.discrete_y),
residual = ~dsum(~units | year_site.UID),
na.action = list(y = "include", x = "omit"),
maxit = maxiter,
fail="soft")
workspace = "20gb"
#fail="soft"
)
summary(fit_BLUE)
g.predBLUE <- predict(fit_BLUE, classify = "genotype")
df_BLUE_ <- data.frame(g.predBLUE$pvals[, c(1, 2)])
......@@ -213,6 +261,7 @@ for(run in start_run:max_runs) {
df_BLUE_$stage1 <- "not weighted"
df_BLUE_$stage2 <- "onestage"
df_BLUE <- bind_rows(df_BLUE, df_BLUE_)
gc()
return(df_BLUE)
}
......@@ -234,13 +283,14 @@ for(run in start_run:max_runs) {
df_BLUES_validate$set <- set
write_csv(df_BLUES_validate, paste0(path_simulation, "/", run, "/", set, "_genotype_true_versus_predict_onestage.csv"))
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_onestage_done.txt"))
plot <- ggplot(data = df_BLUES_validate, aes(value, predict)) +
geom_point() +
geom_abline(intercept = 0, slope=1) +
facet_wrap(~label, scales ="free") +
ggtitle(paste("Run:", run, set))
plot(plot)
# plot <- ggplot(data = df_BLUES_validate, aes(value, predict)) +
# geom_point() +
# geom_abline(intercept = 0, slope=1) +
# facet_wrap(~label, scales ="free") +
# ggtitle(paste("Run:", run, set))
# plot(plot)
} else {
print("Results present, skip")
......
# Working directory with temperature data
path_home <- 'E:/Scripts/htfp_data_processing'
path_simulation <- 'E:/Simulation/Runs'
path_simulation <- 'E:/Simulation/Runs_ALL'
setwd(path_home)
......@@ -14,6 +14,7 @@ library(gridExtra)
library(plyr)
library(stringr)
library(asreml)
library(fs)
library(dplyr)
......@@ -24,8 +25,6 @@ start_run <- 1
max_runs <- 500
#500
number_of_cpus <- 5
sigma_error <- 10
# Plot control overview
......@@ -95,7 +94,7 @@ df_temp <- read_csv('Simulation/covariate_temp.csv')
run <- 19
run <- 1
for(run in start_run:max_runs) {
#try({
......@@ -118,29 +117,48 @@ for(run in start_run:max_runs) {
print(paste0("Set: ", measurement_dates_set_name))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "*_genotype_true_versus_predict_onestage.csv"))
files <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_extract_traits.txt"))
files_res <- Sys.glob(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_reverse_done.txt"))
if(length(files)==1 & (length(files_res)== 0)) {
if(TRUE | length(files)==0) {
try({
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_reverse_start.txt"))
df_params_BLUES_yearsite <- read_csv(paste0(path_simulation, "/", run, "/", set, "_year_site_BLUE_predict.csv"))
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% filter(model %in% c("spline"), parameter != "final_height")
df_params_BLUES_yearsite$weights_vcov <- 1/(df_params_BLUES_yearsite$BLUE_SE^2)
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% filter(parameter %in% c("predict_start_growth", "predict_stop_growth"))
df_params_BLUES_yearsite <- df_params_BLUES_yearsite %>% mutate(parameter = substr(parameter, 9, 30),
genotype.id = as.numeric(gsub("BLUE_([0-9]+)FIP[0-9]+_corrected", "\\1", plot.UID)),
year_site.UID_ = gsub("BLUE_[0-9]+(FIP[0-9]+)_corrected", "\\1", plot.UID),
BLUE = predict)
df_params_BLUES_yearsite$model <- "spline"
df_params_BLUES_yearsite$stage1 <- "weighted"
df_params_BLUES_yearsite$weights_vcov <- 1/(df_params_BLUES_yearsite$se^2)
print("Fit stage 2 (asreml-R)")
df_BLUEs_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
df_BLUEs_weights <- df_params_BLUES_yearsite %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=T)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_weights$stage2 <- "weighted"
df_BLUEs_no_weights <- df_params_BLUES_yearsite %>% filter(!is.na(use_weights)) %>%
df_BLUEs_no_weights <- df_params_BLUES_yearsite %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=F)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_no_weights$stage2 <- "not weighted"
df_BLUES <- bind_rows(df_BLUEs_weights, df_BLUEs_no_weights)
df_params_BLUES_yearsite$weights_vcov <- 1/((df_params_BLUES_yearsite$sepost * sqrt(1000))^2)
df_params_BLUES_yearsite$weights_vcov <- if_else(is.finite(df_params_BLUES_yearsite$weights_vcov), df_params_BLUES_yearsite$weights_vcov, 1)
df_BLUEs_no_postweights <- df_params_BLUES_yearsite %>%
mutate(year_site.UID = year_site.UID_) %>% group_by(parameter, model, stage1) %>%
nest() %>% mutate(BLUEs = map(data, fit_REML, parameter = paste(parameter, model), use_weights=T)) %>%
select(-data) %>% unnest(BLUEs) %>% unnest(BLUE)
df_BLUEs_no_postweights$stage2 <- "weighted (posterior simulation)"
df_BLUES <- bind_rows(df_BLUEs_weights, df_BLUEs_no_weights, df_BLUEs_no_postweights)
df_BLUES <- df_BLUES %>% rename(predict = BLUE)
# Validate with true values
......@@ -157,6 +175,8 @@ for(run in start_run:max_runs) {
df_BLUES_validate$set <- set
write_csv(df_BLUES_validate, paste0(path_simulation, "/", run, "/", set, "_reversegenotype_true_versus_predict.csv"))
file_touch(paste0(path_simulation, "/", run, "/", set, "_MILESTONE_reverse_done.txt"))
})
} else {
print("Results present, skip")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment