Commit 570c2109 authored by Tim Vaughan's avatar Tim Vaughan
Browse files

Converting pipeline to DSL 2.

parent 3748245b
#!/usr/bin/env nextflow
nextflow.enable.dsl=1
nextflow.enable.dsl=2
/*
Desired analyses:
......@@ -16,13 +16,13 @@ process annotate_sequences {
cache 'lenient'
publishDir 'sequences', mode: 'copy'
input:
path sequences from seq_url
path metadata from details_url
path sequences
path metadata
output:
tuple path('aligned_sequences_annotated.fasta'),
path('specialAges.txt') into annotated_sequences
path 'aligned_sequences_known_dates.fasta' into dated_sequences
path('specialAges.txt'), emit: annotated_sequences
path 'aligned_sequences_known_dates.fasta', emit: dated_sequences
path metadata
"""
......@@ -32,14 +32,16 @@ process annotate_sequences {
library(lubridate)
library(bioseq)
to_exclude = c("MPXV_USA_2022_FL001", "MPXV_USA_2022_VA001", "MPXV_1_IT_Milan_2022")
to_exclude_accession = c("OP295382", "OP295383", "ON782054", "OP331335", "OP331336")
to_exclude_strain = c("MPXV_USA_2022_FL001", "MPXV_USA_2022_VA001", "MPXV_1_IT_Milan_2022")
align <- read_fasta('$sequences')
align_tb <- tibble(accession=names(align), sequence=align)
details <- read_csv('$metadata') %>%
rowwise() %>%
filter(!(strain %in% to_exclude)) %>%
filter(!(strain %in% to_exclude_strain)) %>%
filter(!(accession %in% to_exclude_accession)) %>%
mutate(est=is.na(date)) %>%
mutate(dateSubmitted=if_else(is.na(dateSubmitted), today(), dateSubmitted)) %>%
mutate(upper=dateSubmitted) %>%
......@@ -76,33 +78,33 @@ process estimate_ML_tree {
publishDir 'ml', mode: 'copy'
input:
path alignment_file from dated_sequences
path alignment_file
output:
path "${alignment_file}.treefile"
"""
iqtree2 -s $alignment_file
iqtree2 -s $alignment_file -m GTR+F+I --fast
"""
}
process run_BD_mcmc {
process BD_analysis {
publishDir 'results', mode: 'copy'
input:
tuple path('aligned_sequences_annotated.fasta'), path(special_ages) from annotated_sequences
path xmlFile from Channel.fromPath("XML/R0_BD_*.xml")
each infWeeks from Channel.from(2, 4)
each sampUB from Channel.from("0.1", "1")
each seed from 1..5
tuple path('aligned_sequences_annotated.fasta'), path(special_ages)
path xmlFile
each infWeeks
each sampUB
each seed
output:
path "${xmlFile.getBaseName()}.infWeeks${infWeeks}.sampUB${sampUB}.${seed}.trees"
tuple val("${xmlFile.getBaseName()}_sampUB$sampUB"),
val(infWeeks),
val(seed),
path("${xmlFile.getBaseName()}.infWeeks${infWeeks}.sampUB${sampUB}.${seed}.log") \
into bd_raw_output
path("${xmlFile.getBaseName()}.infWeeks${infWeeks}.sampUB${sampUB}.${seed}.log"),
emit: bd_raw_output
script:
buRate = 52.0/infWeeks
......@@ -117,14 +119,16 @@ process run_BD_mcmc {
"""
}
process run_Coal_mcmc {
publishDir 'results', mode: 'copy'
input:
tuple path('aligned_sequences_annotated.fasta'), path(special_ages) from annotated_sequences
path xmlFile from Channel.fromPath("XML/R0_Coal*.xml")
each infWeeks from Channel.from(2, 4)
each seed from 1..5
tuple path('aligned_sequences_annotated.fasta'), path(special_ages)
path xmlFile
each infWeeks
each seed
output:
path "${xmlFile.getBaseName()}.infWeeks${infWeeks}.${seed}.trees"
......@@ -132,7 +136,7 @@ process run_Coal_mcmc {
val(infWeeks),
val(seed),
path("${xmlFile.getBaseName()}.infWeeks${infWeeks}.${seed}.log") \
into coal_raw_output
emit: coal_raw_output
script:
buRate = 52.0/infWeeks
......@@ -146,115 +150,132 @@ process run_Coal_mcmc {
"""
}
process process_bd_traces {
input:
tuple val(methodName),
val(infWeeks),
val(seed),
path(trace) from bd_raw_output
output:
path "processed.log" into processed_BD_traces1, processed_BD_traces2
workflow {
annotate_sequences(seq_url, details_url)
estimate_ML_tree(annotate_sequences.out.dated_sequences)
"""
#!/usr/bin/env Rscript
BD_analysis(annotate_sequences.out.annotated_sequences,
Channel.fromPath("XML/R0_BD_*.xml"),
Channel.from(2,4),
Channel.from("0.1","1"),
1..5)
library(tidyverse)
read_tsv("$trace") %>% slice_tail(prop=0.9) %>%
transmute(clockRate, R0Value, tree.height, sampProp=sampValue.1) %>%
mutate(method="$methodName", infWeeks="$infWeeks", seed=$seed) %>%
pivot_longer(cols=-c(method,infWeeks)) %>%
write_csv("processed.log")
"""
Coal_analysis(annotate_sequences.out.annotated_sequences,
Channel.fromPath("XML/R0_Coal*.xml"),
Channel.from(2,4),
1..5)
}
process process_Coal_traces {
input:
tuple val(methodName),
val(infWeeks),
val(seed),
path(trace) from coal_raw_output
// process process_bd_traces {
// input:
// tuple val(methodName),
// val(infWeeks),
// val(seed),
// path(trace) from bd_raw_output
output:
path "processed.log" into processed_Coal_traces
// output:
// path "processed.log" into processed_BD_traces1, processed_BD_traces2
"""
#!/usr/bin/env Rscript
// """
// #!/usr/bin/env Rscript
library(tidyverse)
// library(tidyverse)
read_tsv("$trace") %>% slice_tail(prop=0.9) %>%
select(clockRate, R0Value, tree.height) %>%
mutate(method="$methodName", infWeeks="$infWeeks", seed=$seed) %>%
pivot_longer(cols=-c(method,infWeeks)) %>%
write_csv("processed.log")
"""
}
// read_tsv("$trace") %>% slice_tail(prop=0.9) %>%
// transmute(clockRate, R0Value, tree.height, sampProp=sampValue.1) %>%
// mutate(method="$methodName", infWeeks="$infWeeks", seed=$seed) %>%
// pivot_longer(cols=-c(method,infWeeks)) %>%
// write_csv("processed.log")
// """
// }
process make_sampProp_plot {
publishDir 'figures', mode: 'copy'
// process process_Coal_traces {
// input:
// tuple val(methodName),
// val(infWeeks),
// val(seed),
// path(trace) from coal_raw_output
input:
path concatenatedBD from processed_BD_traces1.collectFile(keepHeader: true, skip: 1)
// output:
// path "processed.log" into processed_Coal_traces
output:
path"compare_sampProp.png"
// """
// #!/usr/bin/env Rscript
"""
#!/usr/bin/env Rscript
// library(tidyverse)
library(tidyverse)
// read_tsv("$trace") %>% slice_tail(prop=0.9) %>%
// select(clockRate, R0Value, tree.height) %>%
// mutate(method="$methodName", infWeeks="$infWeeks", seed=$seed) %>%
// pivot_longer(cols=-c(method,infWeeks)) %>%
// write_csv("processed.log")
// """
// }
data <- read_csv("$concatenatedBD")
p <- ggplot(data %>% filter(name == "sampProp"), aes(x=value,col=method,fill=method)) +
geom_density(alpha=0.5) +
facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free") +
scale_x_log10()
ggsave("compare_sampProp.png", p, width=15, height=15, units="cm")
"""
}
// process make_sampProp_plot {
// publishDir 'figures', mode: 'copy'
process make_plots {
publishDir 'figures', mode: 'copy'
// input:
// path concatenatedBD from processed_BD_traces1.collectFile(keepHeader: true, skip: 1)
input:
path concatenated from processed_BD_traces2
.mix(processed_Coal_traces)
.collectFile(keepHeader: true, skip: 1)
// output:
// path"compare_sampProp.png"
output:
path "compare_R0.png"
path "compare_clockRate.png"
path "compare_MRCA.png"
// """
// #!/usr/bin/env Rscript
"""
#!/usr/bin/env Rscript
// library(tidyverse)
library(tidyverse)
// data <- read_csv("$concatenatedBD")
data <- read_csv("$concatenated")
p <- ggplot(data %>% filter(name == "R0Value"), aes(x=value,col=method,fill=method)) +
geom_density(alpha=0.5) +
facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free") +
geom_vline(xintercept=1, col="red") +
coord_cartesian(xlim=c(1,3))
ggsave("compare_R0.png", p, width=15, height=15, units="cm")
p <- ggplot(data %>% filter(name == "clockRate"), aes(x=value,col=method,fill=method)) +
geom_density(alpha=0.5) +
facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free") +
xlim(c(1e-4,1e-3)) + scale_x_log10()
ggsave("compare_clockRate.png", p, width=15, height=15, units="cm")
p <- ggplot(data %>% filter(name == "tree.height"), aes(x=value,col=method,fill=method)) +
geom_density(alpha=0.5) +
facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free")
ggsave("compare_MRCA.png", p, width=15, height=15, units="cm")
"""
}
// p <- ggplot(data %>% filter(name == "sampProp"), aes(x=value,col=method,fill=method)) +
// geom_density(alpha=0.5) +
// facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free") +
// scale_x_log10()
// ggsave("compare_sampProp.png", p, width=15, height=15, units="cm")
// """
// }
// process make_plots {
// publishDir 'figures', mode: 'copy'
// input:
// path concatenated from processed_BD_traces2
// .mix(processed_Coal_traces)
// .collectFile(keepHeader: true, skip: 1)
// output:
// path "compare_R0.png"
// path "compare_clockRate.png"
// path "compare_MRCA.png"
// """
// #!/usr/bin/env Rscript
// library(tidyverse)
// data <- read_csv("$concatenated")
// p <- ggplot(data %>% filter(name == "R0Value"), aes(x=value,col=method,fill=method)) +
// geom_density(alpha=0.5) +
// facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free") +
// geom_vline(xintercept=1, col="red") +
// coord_cartesian(xlim=c(1,3))
// ggsave("compare_R0.png", p, width=15, height=15, units="cm")
// p <- ggplot(data %>% filter(name == "clockRate"), aes(x=value,col=method,fill=method)) +
// geom_density(alpha=0.5) +
// facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free") +
// xlim(c(1e-4,1e-3)) + scale_x_log10()
// ggsave("compare_clockRate.png", p, width=15, height=15, units="cm")
// p <- ggplot(data %>% filter(name == "tree.height"), aes(x=value,col=method,fill=method)) +
// geom_density(alpha=0.5) +
// facet_grid(rows=vars(infWeeks), cols=vars(name), scales="free")
// ggsave("compare_MRCA.png", p, width=15, height=15, units="cm")
// """
// }
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment