Last updated: 2018-08-20
workflowr checks: (Click a bullet for more information) ✖ R Markdown file: uncommitted changes
The R Markdown is untracked by Git. To know which version of the R Markdown file created these results, you’ll want to first commit it to the Git repo. If you’re still working on the analysis, you can ignore this warning. When you’re finished, you can run wflow_publish
to commit the R Markdown file and build the HTML.
✔ Environment: empty
Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.
✔ Seed:
set.seed(20180716)
The command set.seed(20180716)
was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.
✔ Session information: recorded
Great job! Recording the operating system, R version, and package versions is critical for reproducibility.
✔ Repository version: f84ee9f
wflow_publish
or wflow_git_commit
). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:
Ignored files:
Ignored: .DS_Store
Ignored: .Rhistory
Ignored: .Rproj.user/
Ignored: analysis/.DS_Store
Ignored: code/.DS_Store
Ignored: data/.DS_Store
Ignored: data/mating_type_tables/
Ignored: data/sim_results/
Ignored: docs/.DS_Store
Untracked files:
Untracked: .drake/
Untracked: analysis/model_functions.Rmd
Untracked: analysis/plot_model.Rmd
Untracked: analysis/run_model.Rmd
Untracked: analysis/testing_functions.R
Untracked: data/all_results.rds
Untracked: file157c918d759b0.R
Untracked: manuscript/
Untracked: output/
Untracked: run.R
Untracked: sex_shredder.Rproj
Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(purrr)
library(tidyr)
library(stringr)
library(reshape2)
Attaching package: 'reshape2'
The following object is masked from 'package:tidyr':
smiths
library(parallel)
library(Rcpp)
For every combination of male and female genotypes (i.e. mating types), we can calculate the types of zygotes that can be produced, as well as the relative frequency of each zygote type. It is computationally efficient to do this before running the simulation and store all the information in a big table, and then look it up later as needed. The types of zygotes produced depend on three parameters which are passed as arguments, namely:
make_mating_type_table <- function(W_shredding_rate,
Z_conversion_rate,
Zr_creation_rate){
# Step 1: write out the possible alleles at all 3 loci
# (the sex chrs, and the two autosomal resistance loci)
males_sex <- c("Z+Z+", "Z*Z*", "ZrZr", "Z*Z+", "Z*Zr", "Z+Zr")
females_sex <- c("Z+W+", "Z*W+", "ZrW+", "Z+Wr", "Z*Wr", "ZrWr")
autosomal_W_rescue <- c("aa", "Aa", "AA") # Big A is rescue allele for W-shredding
autosomal_Z_rescue <- c("bb", "Bb", "BB") # Big B is rescue allele for Z-conversion
# Step 2: Combine the 3 loci's alleles to find all possible male and female genotypes
get_possible_genotypes <- function(sex_chr){
expand.grid(sex_chr,
autosomal_W_rescue,
autosomal_Z_rescue,
stringsAsFactors = FALSE) %>%
apply(1, paste0, collapse = "")
}
male_genotypes <- get_possible_genotypes(sex_chr = males_sex)
female_genotypes <- get_possible_genotypes(sex_chr = females_sex)
# Step 3: Find all possible mating types
mating_types <- expand.grid(male_genotypes,
female_genotypes,
stringsAsFactors = FALSE) %>%
as.data.frame() %>%
mutate(male = Var1, female = Var2) %>%
select(female, male)
# Step 4: Find the types and frequencies of gametes produced by each mating type
## 4a: Identify mating types that experience W-shredding and/or Z-conversion
## Note that the A and B alleles are dominant - one copy confers protection
W.shredding <- grepl("Z[*]W[+]", mating_types$female) &
!(grepl("A", mating_types$female))
Z.conversion <- grepl("Z[*]Z[+]", mating_types$male) &
!(grepl("B", mating_types$male))
## 4b: Find the proportions of each type of sex chromosome in the gametes,
## allowing for W shredding and Z conversion
female_gametes_sex <- data.frame(type1 = substr(mating_types$female, 1, 2),
type2 = substr(mating_types$female, 3, 4),
prop1 = 0.5,
prop2 = 0.5,
stringsAsFactors = FALSE) %>%
mutate(prop1 = replace(prop1, W.shredding, 0.5 + (0.5 * W_shredding_rate)),
prop2 = 1 - prop1)
### NB: Z*Z+ males can create Zr gametes by NHEJ, hence 'type3' column
male_gametes_sex <- data.frame(type1 = substr(mating_types$male, 1, 2),
type2 = substr(mating_types$male, 3, 4),
type3 = "Zr",
prop1 = 0.5, prop2 = 0, prop3 = 0,
stringsAsFactors = FALSE) %>%
mutate(prop1 = replace(prop1, Z.conversion, 0.5 + 0.5 * Z_conversion_rate),
prop3 = replace(prop3, Z.conversion, (0.5 + 0.5 * Z_conversion_rate) * Zr_creation_rate),
prop1 = prop1 - prop3, # The new Zr chromosomes are taken out of those that undergo gene conversion
prop2 = 1 - prop1 - prop3) # The Z+ chromosomes escaping conversion to Z* or NHEJ to Zr
male_gametes_sex[male_gametes_sex$prop3 == 0,
names(male_gametes_sex) %in% c("type3", "prop3")] <- NA
## 4c: Find the proportions of each autosomal allele in the gametes
get_gametes_autosomes <- function(genotype_column){
lapply(strsplit(substr(genotype_column, 5, 8), split = ""), function(x){
c(paste(x[1], x[3], sep = ""), # NB this bit assumes A and B loci are unlinked
paste(x[1], x[4], sep = ""),
paste(x[2], x[3], sep = ""),
paste(x[2], x[4], sep = "")
)
}) %>% do.call("rbind", .)
}
female_gametes_autosomes <- get_gametes_autosomes(mating_types$female)
male_gametes_autosomes <- get_gametes_autosomes(mating_types$male)
## 4d: Combine sex and autosomal alleles to find complete
## gametic genotypes, and their corresponding frequencies
get_complete_gametes <- function(gametes_sex,
gametes_autosomes,
male){ # logical: are we doing males?
# Make vector showing if there are 1, 2 or 4 possible
# autosome genotypes in the gametes, for each of the mating types
possible.autosomes <- lapply(1:nrow(gametes_autosomes),
function(i) unique(gametes_autosomes[i, ]))
n_unique_autosomes <- sapply(possible.autosomes, length)
# Count if there are 2 or 3 possible sex chromosomes in the sperm (from NHEJ)
# Note that for simpler coding, I treat sex chr homozygotes as having 2 alleles
# (not 1) for this section (it is quickly fixed later by 'simplying')
sex <- rep(2, nrow(gametes_sex)) # always 2 for females, and for most males
if(male) sex[!is.na(gametes_sex$type3)] <- 3 # 3 sex chromosomes in Z*Z+ male sperm
# Paste the sex and autosomes together
paster <- function(i){ # i is the mating type index
sex <- sex[i] # 2 or 3 sex chrs in the focal sex for this mating type
possible.sex.chr <- as.character(gametes_sex[i, 1:sex]) # columns 1-2 or 1-3
sex.chr.freqs <- as.numeric(gametes_sex[i, 2 + as.numeric(male) + (1:sex)]) # cols 3-4 or 4-6
possible.autosomes <- possible.autosomes[[i]] # 1, 2 or 4 autosome genotypes
expand.grid(sex = possible.sex.chr, # combine and paste genotypes
auto = possible.autosomes,
stringsAsFactors = FALSE) %>%
mutate(genotype = map2_chr(sex, auto, paste0, collapse = "")) %>%
mutate(prop = rep(sex.chr.freqs * 1 / n_unique_autosomes[i], # find corresponding freqs
n_unique_autosomes[i])) %>%
group_by(genotype) %>%
summarise(prop = sum(prop))
}
lapply(1:nrow(gametes_sex), paster) # returns a list
} # end of get_complete_gametes()
complete_female_gametes <- get_complete_gametes(female_gametes_sex,
female_gametes_autosomes,
male = FALSE)
complete_male_gametes <- get_complete_gametes(male_gametes_sex,
male_gametes_autosomes,
male = TRUE)
# Step 5: find the zygote frequencies for each mating type
get_zygotes <- function(mating_types,
complete_female_gametes,
complete_male_gametes){
n_mating_types <- length(complete_female_gametes)
# Combine the gametes to make zygotes
zygotes <- lapply(1:n_mating_types, function(i){
combos <- expand.grid(complete_female_gametes[[i]]$genotype,
complete_male_gametes[[i]]$genotype,
stringsAsFactors = FALSE)
props <- expand.grid(complete_female_gametes[[i]]$prop,
complete_male_gametes[[i]]$prop)
combos$prop <- props[,1] * props[,2]
# Discard info on parental origin of alleles
for(j in 1:nrow(combos)) combos[j, 1:2] <- sort(combos[j, 1:2])
# Combine probabilities of identical genotypes
combos %>%
group_by(Var1, Var2) %>%
summarise(prop = sum(prop)) %>%
mutate(mating_type = i)
}) %>%
bind_rows() %>% as.data.frame() %>% mutate(genotype = NA)
for(i in 1:nrow(zygotes)){
A_geno <- c(substr(zygotes$Var1[i], 3, 3),
substr(zygotes$Var2[i], 3, 3)) %>%
sort(decreasing = TRUE)
B_geno <- c(substr(zygotes$Var1[i], 4, 4),
substr(zygotes$Var2[i], 4, 4)) %>%
sort(decreasing = TRUE)
sex_geno <- c(substr(zygotes$Var1[i], 1, 2),
substr(zygotes$Var2[i], 1, 2))
forwards <- paste0(sex_geno, collapse = "")
if(forwards %in% c(males_sex, females_sex)){ # I define Z*Z+ as a 'real' genotype but not Z+Z*
zygotes$genotype[i] <- paste0(c(forwards,
A_geno,
B_geno),
collapse = "")
} else { # if genotype is not real, paste it the other way around
zygotes$genotype[i] <- paste0(c(paste0(rev(sex_geno), collapse = ""),
A_geno,
B_geno),
collapse = "")
}
} # end of for loop
data.frame(mating_type = zygotes$mating_type,
mother = mating_types$female[zygotes$mating_type],
father = mating_types$male[zygotes$mating_type],
zygote = zygotes$genotype,
prop = zygotes$prop, stringsAsFactors = FALSE)
} # end of get_zygotes()
get_zygotes(mating_types, complete_female_gametes, complete_male_gametes)
}
# test <- make_mating_type_table(W_shredding_rate = 0.95,
# Z_conversion_rate = 0.95,
# Zr_creation_rate = 0.01)
This function supplements a mating_type_table with all the extra progeny classes that can appear by a mutation of one of the normal progeny classes. We assume that Z+ chromosomes mutate to Zr and vice versa, and W+ to Wr and vice versa. We assume that mutations are equally likely in both directions, meaning that mutation will not consistently affect the allele frequencies.
add_mutants <- function(mating_type_table, Zr_mutation_rate, Wr_mutation_rate){
zygote_has_two_Z <- grepl("Z[+]Z[+]", mating_type_table$zygote)
zygote_has_one_Z <- grepl("Z[+]", mating_type_table$zygote) & !zygote_has_two_Z
zygote_has_two_Zr <- grepl("ZrZr", mating_type_table$zygote)
zygote_has_one_Zr <- grepl("Zr", mating_type_table$zygote) & !zygote_has_two_Zr
zygote_has_W <- grepl("W[+]", mating_type_table$zygote)
zygote_has_Wr <- grepl("Wr", mating_type_table$zygote)
# Create the mutant zygotes...
Z_mutants1 <- mating_type_table %>% # single Z+ -> Zr mutants in Z+/- individuals
filter(zygote_has_one_Z) %>%
mutate(zygote = str_replace_all(zygote, "Z[+]", "Zr"),
prop = prop * Zr_mutation_rate)
Z_mutants2 <- mating_type_table %>% # single Z+ -> Zr mutants in Z+Z+ individuals
filter(zygote_has_two_Z) %>%
mutate(zygote = str_replace_all(zygote, "Z[+]Z[+]", "Z+Zr"),
prop = prop * Zr_mutation_rate * 2)
Z_mutants3 <- mating_type_table %>% # double Z+ -> Zr mutants in Z+Z+ individuals
filter(zygote_has_two_Z) %>%
mutate(zygote = str_replace_all(zygote, "Z[+]Z[+]", "ZrZr"),
prop = prop * Zr_mutation_rate * Zr_mutation_rate)
Zr_mutants1 <- mating_type_table %>% # single Zr -> Z+ mutants in Zr/- individuals
filter(zygote_has_one_Zr) %>%
mutate(zygote = str_replace_all(zygote, "Zr", "Z+"),
prop = prop * Zr_mutation_rate)
Zr_mutants2 <- mating_type_table %>% # single Zr -> Z+ mutants in ZrZr individuals
filter(zygote_has_two_Zr) %>%
mutate(zygote = str_replace_all(zygote, "ZrZr", "Z+Zr"),
prop = prop * Zr_mutation_rate * 2)
Zr_mutants3 <- mating_type_table %>% # double Zr -> Z+ mutants in ZrZr individuals
filter(zygote_has_two_Zr) %>%
mutate(zygote = str_replace_all(zygote, "ZrZr", "Z+Z+"),
prop = prop * Zr_mutation_rate * Zr_mutation_rate)
W_mutants <- mating_type_table %>%
filter(zygote_has_W) %>%
mutate(zygote = str_replace_all(zygote, "W[+]", "Wr"),
prop = prop * Wr_mutation_rate)
Wr_mutants <- mating_type_table %>%
filter(zygote_has_Wr) %>%
mutate(zygote = str_replace_all(zygote, "Wr", "W+"),
prop = prop * Wr_mutation_rate)
# Subtract the mutants from the non-mutants' frequencies
mating_type_table$prop[zygote_has_one_Z] <-
mating_type_table$prop[zygote_has_one_Z] - Z_mutants1$prop
mating_type_table$prop[zygote_has_two_Z] <-
mating_type_table$prop[zygote_has_two_Z] - Z_mutants2$prop - Z_mutants3$prop
mating_type_table$prop[zygote_has_one_Zr] <-
mating_type_table$prop[zygote_has_one_Zr] - Zr_mutants1$prop
mating_type_table$prop[zygote_has_two_Zr] <-
mating_type_table$prop[zygote_has_two_Zr] - Zr_mutants2$prop - Zr_mutants3$prop
mating_type_table$prop[zygote_has_W] <-
mating_type_table$prop[zygote_has_W] - W_mutants$prop
mating_type_table$prop[zygote_has_Wr] <-
mating_type_table$prop[zygote_has_Wr] - Wr_mutants$prop
rbind(mating_type_table, # bind mutants onto the mating type table, combine and simplify
Z_mutants1, Z_mutants2, Z_mutants3,
Zr_mutants1, Zr_mutants2, Zr_mutants3,
W_mutants, Wr_mutants) %>%
group_by(mating_type, mother, father, zygote) %>%
summarise(prop = sum(prop)) %>%
as.data.frame() %>%
arrange(mating_type, mother, father, -prop) %>%
mutate(mating_type = as.character(mating_type),
mother = as.character(mother),
father = as.character(father))
}
# Helper function to convert the zygote frequencies to a cumulative sum.
# To create each offspring, we later roll random numbers between 0 and 1 with runif(),
# then see which is the largest index of 'prop' that is less than the random number.
# This will efficiently determine which zygote is produced, allowing us to roll all the
# random numbers needed in one step only (making use of vectorisation for speed)
convert_probabilities <- function(mating_type_table){
# Remove zygote classes with zero probability (e.g. because drive is 100% and Z+ or W+ are missing)
mating_type_table <- mating_type_table %>% filter(prop != 0)
# Arrange probs from biggest to smallest within each mating type
mating_type_table <- mating_type_table %>%
split(mating_type_table$mating_type) %>%
map(~ .x %>% arrange(-prop)) %>%
bind_rows()
# convert to cumsum probabilities; for example,
# The classic 1:2:1 Mendelian ratio has genotype frequencies of .25, .5, and .25
# This is represented as c(0, 0.25, 0.75).
# Rolling runif() 0 to 0.25 yields geno 1, >.25 gives genotype 2, and >.75 gives genotype 3
mating_type_table$prop <- mating_type_table$prop %>%
split(mating_type_table$mating_type) %>% # Get the vector of zygote probs for each mating type
map(function(focal_props){
focal_props <- cumsum(focal_props)
c(0, focal_props[-length(focal_props)])
}) %>% combine()
# Remove numbers so close 1 they are converted to 1 by floating point arithmetic
# Effectively, we assume that sufficiently rare progeny classes never appear (<10^-16 I think)
mating_type_table %>% filter(prop != 1)
}
We assume that individuals with wild type sex chromosomes (Z+ and W+), as well as non-resistant autosomal alleles (genotype aabb), have a fitness of 1. The resistant alleles (Zr, Wr, A, and B), as well as the driving Z* allele, all potentially incur costs. These costs are multiplicative, such that the fitness cost \(c\) of e.g. having both Zr and A is \(c = 1 {\times} (1 - c_{Zr}) {\times} (1 - c_{A})\). The costs are also all dominant: having one copy of Z*, Zr, Wr, A, and B is equally costly as having two.
make_fitness_table <- function(cost_Zdrive_female,
cost_Zdrive_male,
cost_Wr,
cost_Zr,
cost_A,
cost_B,
mating_type_table){
female_fitnesses <- mating_type_table %>%
select(mother) %>% distinct() %>%
rename(genotype = mother) %>%
mutate(w = 1,
w = replace(w, grepl("Wr", genotype), (1 - cost_Wr)))
female_fitnesses$w[grep("Z[*]", female_fitnesses$genotype)] <-
female_fitnesses$w[grep("Z[*]", female_fitnesses$genotype)] * (1 - cost_Zdrive_female)
male_fitnesses <- mating_type_table %>%
select(father) %>% distinct() %>%
rename(genotype = father) %>%
mutate(w = 1,
w = replace(w, grepl("Z[*]", genotype), (1 - cost_Zdrive_male)))
fitness <- rbind(female_fitnesses, male_fitnesses)
fitness$w[grep("Zr", fitness$genotype)] <-
fitness$w[grep("Zr", fitness$genotype)] * (1 - cost_Zr)
fitness$w[grep("A", fitness$genotype)] <-
fitness$w[grep("A", fitness$genotype)] * (1 - cost_A)
fitness$w[grep("B", fitness$genotype)] <-
fitness$w[grep("B", fitness$genotype)] * (1 - cost_B)
fitness %>% rename(fitness = w)
}
This function creates a population with a specified size, spread evenly across a specified number of patches, with specified allele frequencies in Hardy-Weinberg genotype frequencies.
make_initial_population <- function(initial_Zdrive,
initial_Zr,
initial_Wr,
initial_A,
initial_B,
initial_pop_size,
n_patches,
fitness_table){
initial_freqs <- fitness_table %>%
rename(freq = fitness) %>%
mutate(sex = substr(genotype, 1, 4),
auto1 = substr(genotype, 5, 6),
auto2 = substr(genotype, 7, 8),
freq = 0)
Z <- (1 - initial_Zdrive - initial_Zr)
Zdrive <- initial_Zdrive
Zr <- initial_Zr
W <- (1 - initial_Wr)
Wr <- initial_Wr
initial_freqs$sex[initial_freqs$sex == "Z+Z+"] <- Z ^ 2
initial_freqs$sex[initial_freqs$sex == "Z*Z*"] <- Zdrive ^ 2
initial_freqs$sex[initial_freqs$sex == "ZrZr"] <- Zr ^ 2
initial_freqs$sex[initial_freqs$sex == "Z*Z+"] <- 2 * Zdrive * Z
initial_freqs$sex[initial_freqs$sex == "Z+Zr"] <- 2 * Z * Zr
initial_freqs$sex[initial_freqs$sex == "Z*Zr"] <- 2 * Zdrive * Zr
initial_freqs$sex[initial_freqs$sex == "Z+W+"] <- Z * W
initial_freqs$sex[initial_freqs$sex == "Z*W+"] <- Zdrive * W
initial_freqs$sex[initial_freqs$sex == "ZrW+"] <- Zr * W
initial_freqs$sex[initial_freqs$sex == "Z+Wr"] <- Z * Wr
initial_freqs$sex[initial_freqs$sex == "Z*Wr"] <- Zdrive * Wr
initial_freqs$sex[initial_freqs$sex == "ZrWr"] <- Zr * Wr
initial_freqs[initial_freqs == "aa"] <- (1 - initial_A) ^ 2
initial_freqs[initial_freqs == "Aa"] <- 2 * initial_A * (1 - initial_A)
initial_freqs[initial_freqs == "AA"] <- initial_A ^ 2
initial_freqs[initial_freqs == "bb"] <- (1 - initial_B) ^ 2
initial_freqs[initial_freqs == "Bb"] <- 2 * initial_B * (1 - initial_B)
initial_freqs[initial_freqs == "BB"] <- initial_B ^ 2
for(i in 2:ncol(initial_freqs)) initial_freqs[, i] <- as.numeric(initial_freqs[, i])
initial_freqs$freq <- with(initial_freqs, sex * auto1 * auto2)
data.frame(genotype = sample(initial_freqs$genotype,
initial_pop_size,
prob = initial_freqs$freq,
replace = TRUE),
patch = sample(n_patches, initial_pop_size, replace = TRUE),
stringsAsFactors = FALSE) %>%
group_by(patch, genotype) %>%
summarise(n = n()) %>% arrange(patch, -n) %>%
ungroup()
}
# test_pop <- make_initial_population(0.1, 0.1, 0.1, 0.1, 0.1, initial_pop_size = 1000, n_patches = 10, fitness_table = make_fitness_table())
This functions adds release_size
Z*Z* males to the population, simulating a release of the engineered Z chromosome. We assume the release is localised, such that patch 1 gets most of it, then patch 2, and so on exponentially. For \(k\) patches, the proportion of the release to patch \(i\) is given by \(p_i = exp(i)^{-1} / (\sum_{i=1}^{k}exp(i)^{-1}/k)\).
release_Zd_individuals <- function(parent_list, n_patches, fitness_table, release_size, release_strategy){
if(release_strategy == "one_patch"){
# Release drive males in most productive patch
patch_productivities <- parent_list[[1]] %>%
group_by(patch) %>%
summarise(productivity = sum(fecundity))
released_males <- data.frame(
patch = with(patch_productivities, patch[which.max(productivity)]),
genotype = "Z*Z*aabb",
n = release_size,
fitness = fitness_table$fitness[fitness_table$genotype == "Z*Z*aabb"],
stringsAsFactors = FALSE)
}
# Scatter the release randomly and evenly across all patches
if(release_strategy == "all_patches"){
released_males <- data.frame(
patch = 1:n_patches,
genotype = "Z*Z*aabb",
n = c(rmultinom(1, release_size, rep(1 / n_patches, n_patches))),
fitness = fitness_table$fitness[fitness_table$genotype == "Z*Z*aabb"],
stringsAsFactors = FALSE)
}
list(
parent_list[[1]], # females
rbind(parent_list[[2]] %>% select(-is_female) %>% as.data.frame(),
released_males) %>% # original+released males
group_by(patch, genotype, fitness) %>%
summarise(n = sum(n)) %>%
ungroup() %>% as.data.frame()
)
}
This function is run once per generation in the simulation. It randomly picks the male and female parents of each new offspring, where parents with high fitness genotypes (e.g. Z+Z+aabb) are more likely to be picked than parents with low fitness genotypes (e.g. Z*Z*AABB). We assume a promiscuous mating system, such that individuals of both sexes potentially produce offspring with multiple partners. The fitness of each individual is stochastic, so each generation there will be some males and females that do not breed, and others that reproduce a lot.
We assume that all mating occurs within patches. Males thus always compete locally, meaning that alleles in males always experience “soft” selection (resulting from differences in lifetime mating success). For females, we model global and local competition separately (corresponding to hard and soft selection respectively). Selection on females results from differences in the lifetime number of offspring produced (this could be fecundity and/or viability selection).
We assume that under ideal conditions (e.g. in a near-empty patch or meta-population), a female with genotypic fitness \(w_j\) has an expected fecundity of \(w_j * max_fecundity\), while under crowded conditions, each females’ expected fecundity tends towards \(0\).
Under soft selection, a female in a PATCH containing \(n_f\) females has an expected fecundity of \(w_j * max_fecundity / (n_patches * ∑_i n_f w_i)\) offspring.
Under hard selection, a female in a METAPOPULATION containing \(n_f\) females has an expected fecundity of \(w_j * max_fecundity / (∑_i n_f w_i)\) offspring.
The actual number of progeny per female is generated stochastically by drawing from a Poisson distribution.
# Helper function
# given a vector of pre-rolled uniform random numbers (rand), a vector of cumulative sum probabilities,
# and a vector of the genotypes to which probability corresponds, pick the genotypes
# random_picker <- function(rand, probabilities, genotypes){
# sapply(1:length(rand), function(i) genotypes[sum(rand[i] > probabilities)])
# }
cppFunction('CharacterVector random_picker(
NumericVector rand,
NumericVector probabilities,
CharacterVector genotypes) {
int nRand = rand.size();
int nGenotypes = genotypes.size();
CharacterVector out(nRand);
/* First check if there is only one genotype: no random picking needed */
if(nGenotypes == 1){
for(int i = 0; i < nRand; ++i) {
out[i] = genotypes[0];
}
return out;
}
int stoppingPoint = nGenotypes - 1;
LogicalVector booleans(nGenotypes);
booleans[0] = true;
int picked = 0;
bool keepGoing = true;
/* Loop over the random numbers in rand */
for(int i = 0; i < nRand; ++i) {
/* Find which elements of probabilities are < rand[i] */
for(int j = 1; j < nGenotypes; ++j) {
booleans[j] = rand[i] > probabilities[j];
}
picked = 0;
keepGoing = true;
/* Loop over the booleans, and find the last element that is true */
while(keepGoing == true) {
if(booleans[picked + 1] == true) {
picked += 1;
if(picked == stoppingPoint) keepGoing = false;
} else keepGoing = false;
}
out[i] = genotypes[picked];
}
return out;
}')
# #
# x <- runif(100)
# random_picker(x, c(0, 0.5, 0.9, 0.95), c("A", "B", "C", "D"))
# random_picker_R(x, c(0, 0.5, 0.9, 0.95), c("A", "B", "C", "D"))
# microbenchmark::microbenchmark(C = random_picker(x, c(0, 0.5, 0.9, 0.95), c("A", "B", "C", "D")),
# R = random_picker_R(x, c(0, 0.5, 0.9, 0.95), c("A", "B", "C", "D")), times = 10000)
pick_mothers <- function(pop, fitness_table, n_patches,
softness,
male_weighting,
max_fecundity,
carrying_capacity,
density_dependence_shape){
pop <- pop %>%
# filter(n != 0) %>% # double check there are no empty genotypes....
left_join(fitness_table, by = "genotype") %>% # add fitness column
mutate(is_female = grepl("W", genotype)) # Add column of TRUE/FALSE for sex
# Make df of the male and female numbers (needed later as well as immediately)
patch_densities <- pop %>%
group_by(patch) %>%
summarise(n_females = sum(n[is_female]),
n_males = sum(n[!is_female]))
if(nrow(patch_densities) == 0) return(NULL) # Return NULL if pop has gone extinct
# Calculate global density, unless selection is 100% soft
# Note that all-male and all-female patches still contribute to the global density
if(softness != 1) global_density <- sum(pop$n[pop$is_female]) +
sum(pop$n[!pop$is_female]) * male_weighting
# here are the parents eligible to breed (no single-sex patches)
pop <- pop %>% filter(patch %in% patch_densities$patch)
females <- pop %>% filter(is_female)
males <- pop %>% filter(!is_female)
# under totally hard selection, we can skip calculation of the patch-specific densities
# Calculate (global) density-dependent fecundity accrording to Richards model (Fowler 1981):
# Females of genotype i have expected fecundity of w_i * max_fecundity * (Richards density regulation)
if(softness == 0){
females$density <- 1 - (global_density / carrying_capacity) ^ density_dependence_shape
} else { # under soft selection, we also need to calculate the patch-specific densities
# Patch-specific density is calculated as:
# softness * n_patches * (patch_n_females + patch_n_males * male_weighting)
if(softness == 1){
females <- females %>%
left_join(patch_densities %>% # softness=1, only local density matters
mutate(density = n_patches * (n_females + male_weighting * n_males)) %>%
select(patch, density),
by = "patch")
} else { # for 0 < softness < 1, we need both local and global density
females <- females %>%
left_join(patch_densities %>%
mutate(density = softness * n_patches *
(n_females + male_weighting * n_males) +
(1 - softness) * global_density) %>%
select(patch, density),
by = "patch")
}
}
# Randomly generate fecundity from the expected fecundities of each genotype/patch combination
# So, 10 ZWaabb females with fecundity 2.5 would have rpois(1, 25) offspring
females$fecundity <-
rpois(nrow(females), with(females,
n * fitness * max_fecundity * (1 - (density / carrying_capacity) ^ density_dependence_shape)))
print(females)
# Number of offspring born, by mother genotype and patch:
females <- females %>%
select(patch, genotype, fecundity)
# This is `parent_list` for the next function
return(list(females, males))
}
pick_fathers <- function(parent_list){
if(is.null(parent_list)) return(NULL) # Return NULL if pop has gone extinct
offspring_per_patch <- parent_list[[1]] %>%
group_by(patch) %>%
summarise(offspring = sum(fecundity)) %>%
filter(offspring != 0)
# Generate total_offspring random numbers to determine each offspring's father,
# And split these numbers by patch
rand <- runif(sum(offspring_per_patch$offspring)) %>%
split(rep(offspring_per_patch$patch,
times = offspring_per_patch$offspring))
# Now loop over all the patches with >0 offspring, and find fathers
# for individual offspring (therefore, we assume promiscuity)
lapply(1:nrow(offspring_per_patch), function(i){
# get the numbers of each male and female genotype in the focal patch
females_in_patch <- parent_list[[1]] %>% filter(patch == offspring_per_patch$patch[i])
males_in_patch <- parent_list[[2]] %>% filter(patch == offspring_per_patch$patch[i])
# Find the siring probability of each male genotype, convert to cumsum
father_probabilities <- males_in_patch$n * males_in_patch$fitness
father_probabilities <- cumsum(father_probabilities / sum(father_probabilities))
data.frame(patch = offspring_per_patch$patch[i],
parents = paste(rep(females_in_patch$genotype,
times = females_in_patch$fecundity),
random_picker(rand[[i]],
c(0, father_probabilities[-length(father_probabilities)]),
males_in_patch$genotype)),
stringsAsFactors = FALSE)
}) %>% do.call("rbind", .) %>%
group_by(patch, parents) %>%
summarise(n = n()) %>% as.data.frame()
}
Most of the hard work has already been done, when we created the mating_type_table above. This function takes a data frame of parents (giving the number of offspring produced by each different mating type in each patch), looks up the expected zygote frequencies in mating_type_table
, and then randomly generates the zygotes.
make_offspring <- function(parents, mating_type_table){
if(is.null(parents) || sum(parents$n) == 0) return(NULL) # Return NULL if pop has gone extinct
# Number of offspring from each mating type, summed across patches
offspring_per_mating_type <- parents %>%
group_by(parents) %>%
summarise(n = sum(n))
# Restrict the big mating_type_table, for faster searching
mating_type_table <- mating_type_table %>%
filter(parents %in% offspring_per_mating_type$parents)
# Generate a random number for every offspring, and split between each of the i mating types
rand <- runif(sum(parents$n)) %>%
split(rep(offspring_per_mating_type$parents,
times = offspring_per_mating_type$n))
# Loop over all the mating types
lapply(1:nrow(offspring_per_mating_type), function(i) {
focal.parents <- parents %>% # Get the patch ids for each offspring (same as parents')
filter(parents == offspring_per_mating_type$parents[i])
focal.zygotes <- mating_type_table %>% # Get the zygote proportions for the focal mating type
filter(parents == offspring_per_mating_type$parents[i])
data.frame(genotype = random_picker(rand[[i]],
focal.zygotes$prop,
focal.zygotes$zygote),
patch = rep(focal.parents$patch,
times = focal.parents$n),
stringsAsFactors = FALSE) %>%
group_by(patch, genotype) %>%
summarise(n = n())
}) %>% bind_rows() %>%
group_by(patch, genotype) %>%
summarise(n = sum(n)) %>%
ungroup()
}
This function moves newly-born indiviuals to a different randomly-selected patch. Males and females potentially migrate at different rates.
migrate_population <- function(pop,
n_patches,
patch_landing_probabilities, # passed as arg to save time
male_migration_prob,
female_migration_prob,
migration_type){
if(is.null(pop)) return(NULL) # Return NULL if pop has gone extinct
if(n_patches == 1) return(pop) # don't do anything if there is only one patch
nrow_pop <- nrow(pop)
migration_probs <- rep(male_migration_prob, nrow_pop)
migration_probs[grep("W", pop$genotype)] <- female_migration_prob
# For efficiency, the call to rmultinom() below allows migrants
# to return to their home patch, which would mean they are not really migrants!
# To make sure that male_migration_prob and female_migration_prob correctly
# give the % individuals that move to another patch, the function
# internally boosts the migration rates by a constant, c, which varies
# based on how many patches there are.
# To understand how I derived c, here's the algebra.
# The problem is that the following inequality is always true if there is a finite number of patches
# real_migration_rate < stated_migration_rate * (1 - proportion.returning.to.same.patch)
# To make real_migration_rate = stated_migration_rate, we can add a constant c,
# And solve for c when real_migration_rate = stated_migration_rate:
# real_migration_rate = (stated_migration_rate + c) *
# (1 - proportion.returning.to.same.patch)
# Rearranging, we find that c is:
# c = (stated_migration_rate / (1 - fraction.returning)) - stated_migration_rate
if(migration_type == "global") {
migration_probs <- migration_probs + # Add on c. Not needed for local dispersal.
(migration_probs / (1 - patch_landing_probabilities[1])) - migration_probs
}
n_migrants <- rbinom(nrow_pop, pop$n, prob = migration_probs)
if(sum(n_migrants) == 0) return(pop) # return pop if there are no migrants
pop$n <- pop$n - n_migrants # remove migrants from their original patches
migrants <- pop %>% # Find the number of migrants for each genotype
mutate(n = n_migrants) %>%
filter(n != 0)
if(migration_type == "global"){
migrants <- migrants %>% # tally the migrants' genotypes
group_by(genotype) %>%
summarise(n = sum(n))
pop <- rbind(pop, # Find new patches for the migrants, and return them to the pop
lapply(1:nrow(migrants), function(i){ # for each genotype in the migrant pool...
data.frame(patch = 1:n_patches,
genotype = migrants$genotype[i],
n = rmultinom(1,
size = migrants$n[i],
prob = patch_landing_probabilities)[,1],
stringsAsFactors = FALSE)
}) %>% bind_rows() %>% filter(n != 0))
} else { # else for LOCAL migration...
pop <- rbind(
pop,
data.frame(patch = rep(migrants$patch, migrants$n),
genotype = rep(migrants$genotype, migrants$n),
n = 1,
stringsAsFactors = FALSE) %>%
mutate(patch = patch + sample(c(-1, 1), n(), replace = TRUE), # add -1 or +1 to patch address
patch = replace(patch, patch == 0, n_patches), # going to patch 0 brings you to patch k
patch = replace(patch, patch > n_patches, 1))) # going to patch k+1 brings you to patch 1
}
pop %>%
group_by(patch, genotype) %>%
summarise(n = sum(n)) %>%
ungroup()
}
get_allele_freqs <- function(pop, generation){
pop_size <- sum(pop$n)
n_alleles <- 2 * pop_size
freqs <- data.frame(
Z = pop$n * str_count(pop$genotype, "Z[+]") / n_alleles,
Zd = pop$n * str_count(pop$genotype, "Z[*]") / n_alleles,
Zr = pop$n * str_count(pop$genotype, "Zr") / n_alleles,
W = pop$n * str_count(pop$genotype, "W[+]") / n_alleles,
Wr = pop$n * str_count(pop$genotype, "Wr") / n_alleles,
A = pop$n * str_count(pop$genotype, "A") / n_alleles,
B = pop$n * str_count(pop$genotype, "B") / n_alleles,
females = 0, # placeholder
N = 0
) %>% summarise_all(sum) %>% gather(allele, frequency)
freqs$frequency[8] <- (freqs$frequency[4] + freqs$frequency[5]) * 2 # females
freqs$frequency[9] <- pop_size # pop size
freqs$frequency[1:3] <- freqs$frequency[1:3] / sum(freqs$frequency[1:3]) # Z alleles
freqs$frequency[4:5] <- freqs$frequency[4:5] / sum(freqs$frequency[4:5]) # W alleles
freqs %>% mutate(frequency = round(frequency, 3), # round to save disk space
generation = generation)
}
If running the simulation on many parameter spaces, we will first need to set up several mating_type_tables, each of which takes several seconds of compute time. To speed this up, this function identifies all the unique mating_type_tables that are implied by the table parameters
, and generates them all in parallel using mclapply().
set_up_mating_type_tables <- function(parameters, cores, overwrite = FALSE){
path_to_tables <- "data/mating_type_tables/"
if(overwrite) unlink(list.files(path_to_tables, full.names = TRUE))
parameters <- parameters %>%
mutate(mating_table = paste(W_shredding_rate,
Z_conversion_rate,
Zr_creation_rate,
Zr_mutation_rate,
Wr_mutation_rate, sep = "_"))
uniques <- unique(parameters$mating_table)
file.names <- paste(uniques, ".rds", sep = "")
uniques <- uniques[!(gsub(path_to_tables, "", file.names) %in% list.files(path_to_tables))]
if(length(uniques) > 0){
print("Setting up the mating type tables...")
file.names <- paste(path_to_tables, file.names[!(file.names %in% list.files(path_to_tables))], sep = "")
uniques <- lapply(strsplit(uniques, split = "_"), as.numeric)
mclapply(1:length(uniques), function(i){
make_mating_type_table(W_shredding_rate = uniques[[i]][1],
Z_conversion_rate = uniques[[i]][2],
Zr_creation_rate = uniques[[i]][3]) %>%
add_mutants(Zr_mutation_rate = uniques[[i]][4],
Wr_mutation_rate = uniques[[i]][5]) %>%
convert_probabilities() %>%
mutate(parents = paste(mother, father)) %>%
select(mother, father, parents, zygote, prop) %>%
saveRDS(file = file.names[i])
return(NULL)
}, mc.cores = cores)
}
uniques <- unique(parameters$mating_table)
mating_tables <- lapply(paste(path_to_tables, uniques, ".rds", sep = ""), readRDS)
parameters$mating_table <- (1:length(uniques))[match(parameters$mating_table, uniques)]
return(list(parameters, mating_tables))
}
This function runs the simulation, for a single parameter space specified by parameters[row, ]
, using a pre-made mating_type_table.
run_simulation <- function(row,
parameters,
mating_type_table){
print(row)
output.file <- sample(99999999999999, 1)
set.seed(as.integer(substr(output.file, 1, 5))) # Note that the file name is also the seed
output.file <- paste("data/sim_results/", output.file, ".rds", sep = "")
# Keep only the correct mating table from the list of tables made by set_up_mating_type_tables()
mating_type_table <- mating_type_table[[parameters$mating_table[row]]]
# Make a table holding the quality/fitness of each genotype
fitness_table <- make_fitness_table(
cost_Zdrive_female = parameters$cost_Zdrive_female[row],
cost_Zdrive_male = parameters$cost_Zdrive_male[row],
cost_Wr = parameters$cost_Wr[row],
cost_Zr = parameters$cost_Zr[row],
cost_A = parameters$cost_A[row],
cost_B = parameters$cost_B[row],
mating_type_table = mating_type_table)
# Make the initial population
pop <- make_initial_population(
initial_Zdrive = parameters$initial_Zdrive[row],
initial_Zr = parameters$initial_Zr[row],
initial_Wr = parameters$initial_Wr[row],
initial_A = parameters$initial_A[row],
initial_B = parameters$initial_B[row],
initial_pop_size = parameters$initial_pop_size[row],
n_patches = parameters$n_patches[row],
fitness_table = fitness_table)
# Assign remaining parameters to variables for faster access in the while() loops
release_size <- parameters$release_size[row]
burn_in <- parameters$burn_in[row]
softness <- parameters$softness[row]
male_weighting <- parameters$male_weighting[row]
carrying_capacity <- parameters$carrying_capacity[row]
density_dependence_shape <- parameters$density_dependence_shape[row]
male_migration_prob <- parameters$male_migration_prob[row]
female_migration_prob <- parameters$female_migration_prob[row]
migration_type <- parameters$migration_type[row]
n_patches <- parameters$n_patches[row]
max_fecundity <- parameters$max_fecundity[row]
patch_landing_probabilities <- rep(1/parameters$n_patches[row],
parameters$n_patches[row])
allele_freqs <- vector(burn_in + parameters$generations[row], mode = "list")
i <- 0L
generation_extinct <- NA # Declare outputs
generation_Zd_extinct <- NA
generation_W_extinct <- NA # wild-type W, aka W+
generation_Zd_fixed <- NA #If shredding rate is less than 100%, pop can sometimes persist with the Z*
outcome <- "Timer expired"
final_pop <- "placeholder"
# Do a few generations of burn-in, to allow population size and distribution to stabilise
while(i < burn_in){
i <- i + 1
# Pick the parents, make the offspring, migrate them, then repeat
pop <- pick_mothers(pop = pop,
fitness_table = fitness_table,
n_patches = n_patches,
softness = softness,
male_weighting = male_weighting,
max_fecundity = max_fecundity,
carrying_capacity = carrying_capacity,
density_dependence_shape = density_dependence_shape) %>%
pick_fathers() %>%
make_offspring(mating_type_table = mating_type_table) %>%
migrate_population(n_patches = n_patches,
patch_landing_probabilities = patch_landing_probabilities,
male_migration_prob = male_migration_prob,
female_migration_prob = female_migration_prob,
migration_type = migration_type)
# 1. Check if the pop has gone extinct,
if(is.null(pop)){
generation_extinct <- NA
outcome <- "Didn't survive burn-in"
final_pop <- NA
i <- Inf
} else allele_freqs[[i]] <- get_allele_freqs(pop, i) # record allele freqs
print(allele_freqs[[i]])
}
# If pop survived burn-in, add Zd individuals and begin iterating over generations
if(!is.null(pop)){
i <- 0L
# Add the Zd individuals, AFTER calculating density-dependent fecundity (or pop may crash due to the released individuals breaking the carrying capacity)
pop <- pick_mothers(pop = pop,
fitness_table = fitness_table,
n_patches = n_patches,
softness = softness,
male_weighting = male_weighting,
max_fecundity = max_fecundity,
carrying_capacity = carrying_capacity,
density_dependence_shape = density_dependence_shape) %>%
release_Zd_individuals(n_patches = n_patches,
fitness_table = fitness_table,
release_size = release_size,
release_strategy = parameters$release_strategy[row]) %>%
pick_fathers() %>%
make_offspring(mating_type_table = mating_type_table) %>%
migrate_population(n_patches = n_patches,
patch_landing_probabilities = patch_landing_probabilities,
male_migration_prob = male_migration_prob,
female_migration_prob = female_migration_prob,
migration_type = migration_type)
} else i <- Inf # otherwise, skip the iterations and go to the end
zero_one <- c(0,1) # Declare outside the loop
Zd_fixed_counter <- 0
while(i < parameters$generations[row]){
i <- i + 1
# Pick the parents, make the offspring, migrate them, then repeat
pop <- pick_mothers(pop = pop,
fitness_table = fitness_table,
n_patches = n_patches,
softness = softness,
male_weighting = male_weighting,
max_fecundity = max_fecundity,
carrying_capacity = carrying_capacity,
density_dependence_shape = density_dependence_shape) %>%
pick_fathers() %>%
make_offspring(mating_type_table = mating_type_table) %>%
migrate_population(n_patches = n_patches,
patch_landing_probabilities = patch_landing_probabilities,
male_migration_prob = male_migration_prob,
female_migration_prob = female_migration_prob,
migration_type = migration_type)
# 1. Check if the pop has gone extinct,
if(is.null(pop)){
generation_extinct <- i
outcome <- "Population extinct"
final_pop <- NA
i <- Inf
} else {
# 2. Check if females have gone extinct (also mark as extinct)
if (!any(str_detect(pop$genotype, "W"))) {
generation_extinct <- i
outcome <- "Population extinct"
final_pop <- NA
i <- Inf
} else {
# 3. record the allele frequencies
i_burnin <- i + burn_in
allele_freqs[[i_burnin]] <- get_allele_freqs(pop, i_burnin)
print(allele_freqs[[i_burnin]])
# 4. check if driving Z has gone extinct or reached fixation,
# or if wild-type W has been completely replaced by Wr
if (allele_freqs[[i_burnin]]$frequency[2] %in% zero_one |
allele_freqs[[i_burnin]]$frequency[4] == 0) {
if (allele_freqs[[i_burnin]]$frequency[2] == 0){
generation_Zd_extinct <- i_burnin
outcome <- "Zd extinct"
final_pop <- NA
i <- Inf
} else if (allele_freqs[[i_burnin]]$frequency[2] == 1) {
if(Zd_fixed_counter == 0) generation_Zd_fixed <- i
Zd_fixed_counter <- Zd_fixed_counter + 1
if(Zd_fixed_counter == 20) { # If Zd has been fixed for 20 generations without causing a crash...
outcome <- "Zd fixed without extinction"
final_pop <- NA
i <- Inf
}
} else if (allele_freqs[[i_burnin]]$frequency[4] == 0) {
generation_W_extinct <- i_burnin
outcome <- "Wr fixed"
final_pop <- NA
i <- Inf
}
}
}
} # End of "If not extinct..."
} # End of while()
if(!is.na(final_pop)) final_pop <- pop
print(outcome)
list( # return a list:
parameters[row, ] %>% # [[1]]: A 1-row dataframe holding the parameter space and outcome
cbind(data.frame(generation_extinct,
generation_Zd_extinct,
generation_W_extinct,
generation_Zd_fixed,
outcome,
stringsAsFactors = FALSE)),
allele_freqs %>% # [[2]]: The allele frequencies, for each generation
bind_rows() %>%
select(generation, allele, frequency),
final_pop) %>% # [[3]] final population (if the generation timer ran out; otherwise NA)
saveRDS(file = output.file)
}
This is a wrapper function that calls run_simulation on all rows in parameters, using a specified number of CPU cores to run them in parallel using mclapply().
do_all_parameters <- function(parameters, over_write, cores){
mating_type_tables <- set_up_mating_type_tables(parameters, cores = cores,
overwrite = FALSE) # Change this manually as needed
parameters <- mating_type_tables[[1]] # annotate the parameters with the mating tables numbers
mating_type_tables <- mating_type_tables[[2]] # Get the mating tables themselves
# Dataframe of parameter spaces that are finished
done <- list.files("data/sim_results", full.names = TRUE)
if(length(done) != 0){
finished <- lapply(done, function(x) readRDS(x)[[1]]) %>%
bind_rows() %>%
select(-generation_extinct, -generation_Zd_extinct,
-generation_W_extinct, -generation_Zd_fixed, -outcome)
# Check all the column names are the same! Should be, if all parameters were made using same code
if(!identical(names(parameters), names(finished))) return("Error! Delete results and start afresh")
}
# If not overwriting, remove rows from `parameters` that are already finished
if(!over_write && length(done) != 0){
finished <- apply(finished, 1, paste0, collapse = "_")
to_do <- as.character(apply(parameters, 1, paste0, collapse = "_"))
to_do <- str_remove_all(to_do, " ")
parameters <- parameters[!(to_do %in% finished), ]
if(nrow(parameters) == 0) return(NULL)
print(paste("Already done", length(finished), "model runs"))
}
suppressWarnings(rm(list = c("to_do", "finished", "done"))) # clear up before running the model
print(paste("Queing up", nrow(parameters), "model runs"))
mclapply(1:nrow(parameters),
function(i){
run_simulation(row = i,
parameters = parameters,
mating_type_table = mating_type_tables)
}, mc.cores = cores)
return("Done all parameters")
}
combine_results_files <- function(cores){
list_of_lists <- mclapply(list.files("data/sim_results", full.names = TRUE),
readRDS, mc.cores = cores)
# Simulations that failed end up with NULL in elements 2 and 3?
completed_successfully <- which(map_lgl(list_of_lists, function(i) !is.null(i[[3]])))
# Get file names (which also act as the seed)
filename <- list.files("data/sim_results")[completed_successfully] %>%
str_replace_all("[.]rds", "") %>% as.numeric()
# Get parameters and overall results as a tibble
results <- data.frame(id = filename,
mclapply(completed_successfully,
function(i) list_of_lists[[i]][[1]],
mc.cores = cores) %>% bind_rows()) %>% as_tibble()
# Get allele frequnecies, and nest them in a list column
allele_freqs <- mclapply(completed_successfully, # allele freqs for every generation
function(i) data.frame(id = filename[i], list_of_lists[[i]][[2]]),
mc.cores = cores) %>% bind_rows() %>% group_by(id) %>% nest()
# Get the final population, and nest it in a list column
final_pop <- mclapply(completed_successfully, # allele freqs for every generation
function(i) data.frame(id = filename[i], list_of_lists[[i]][[3]]),
mc.cores = cores) %>% bind_rows() %>% group_by(id) %>% nest()
output <- left_join(results, allele_freqs, by = "id") %>% left_join(final_pop, by = "id")
output %>% rename(allele_freqs = data.x, final_pop = data.y)
}
add_parameters()
This function takes a dataframe extracted from the big results file (df
), and uses left_join()
to add the parameter spaces used to generate those data. For example, one can grab a subset of the time-to-extinction results using pluck(results, "generation_extinct") %>% filter()
, and then add the parameter values corresponding to each result using this function.
add_parameters <- function(df){
df %>% # Join a results table with all the variable para values
left_join(results$parameters[, names(results$parameter) %in%
names(results$parameters %>%
map(function(x) length(unique(x))))],
by = "id")
}
remove_nonvariable_parameters()
This function removes all columns that do not vary at all (e.g. parameters like the maximum number of generations, which was set to a single value in all simulations).
remove_nonvariable_parameters <- function(df){
df[, apply(df, 2, function(x) length(unique(x))) > 1]
}
# Find parameter spaces that DID vary in the parameter sheet but were in 100% of the crashes
find_failures <- function(){
if(!exists("results")) results <- readRDS("data/all_results.rds")
varies_in_results <- remove_nonvariable_parameters(results[[1]] %>% select(-id)) %>% names()
remove_VARIABLE_parameters <- function(df){
df[, apply(df, 2, function(x) length(unique(x))) == 1]
}
list_of_lists <- mclapply(list.files("data/sim_results", full.names = TRUE),
readRDS, mc.cores = 4)
failed_runs <- which(map_lgl(list_of_lists, function(i) is.null(i[[3]])))
failed_paras <- lapply(list_of_lists[failed_runs], function(x) x[[1]]) %>% bind_rows() %>% remove_VARIABLE_parameters()
failed_paras[, names(failed_paras) %in% varies_in_results]
}
# find_failures() # softness = 0 and cost_Zdrive_female = 0.01?
pluck_allele_freqs <- function(row, results, parameters = NULL){
if(is.null(parameters)) return(allele_freqs)
parameters <- enquo(parameters)
data.frame(results[row,] %>% select(!!parameters),
results$allele_freqs[row], stringsAsFactors = FALSE)
}
sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.4
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] Rcpp_0.12.17 reshape2_1.4.3 stringr_1.3.1 tidyr_0.8.1
[5] purrr_0.2.5 dplyr_0.7.6
loaded via a namespace (and not attached):
[1] knitr_1.20 bindr_0.1.1 whisker_0.3-2
[4] magrittr_1.5 workflowr_1.1.1 tidyselect_0.2.4
[7] R6_2.2.2 rlang_0.2.1 plyr_1.8.4
[10] tools_3.5.1 R.oo_1.22.0 git2r_0.22.1
[13] htmltools_0.3.6 yaml_2.1.19 rprojroot_1.3-2
[16] digest_0.6.15 assertthat_0.2.0 tibble_1.4.2
[19] crayon_1.3.4 bindrcpp_0.2.2 R.utils_2.6.0
[22] glue_1.2.0 evaluate_0.10.1 rmarkdown_1.10
[25] stringi_1.2.3 pillar_1.3.0 compiler_3.5.1
[28] backports_1.1.2 R.methodsS3_1.7.1 pkgconfig_2.0.1
This reproducible R Markdown analysis was created with workflowr 1.1.1