To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

...
 
Commits (4)
import os
import matplotlib
matplotlib.use('TkAgg')
from appJar import gui
import yaml
import os
import scenarios
from collections import OrderedDict
import fasta_tools
......@@ -13,13 +13,18 @@ def select():
selection = app.getAllOptionBoxes()
#display default settings organism
with open('../seq_sim/simulation_settings/'+selection['Organism']) as f:
dir_path = os.path.dirname(os.path.realpath(__file__))
dir_path_up = os.sep.join(dir_path.split(os.sep)[:-1])
sim_settings_path = dir_path_up+os.sep+'SeqSimEvo'+os.sep+'simulation_settings/'
scenario_settings_path = dir_path_up+os.sep+'Scenarios'+os.sep+'settings_files/'
with open(sim_settings_path+selection['Organism']) as f:
OrgSettings = f.read()
app.clearTextArea('OrgSet')
app.setTextArea('OrgSet', OrgSettings)
#display default settings Scenario
with open('settings_files/'+selection['Scenario']) as f:
with open(scenario_settings_path+selection['Scenario']) as f:
ScenSettings = f.read()
app.clearTextArea('ScenSet')
app.setTextArea('ScenSet', ScenSettings)
......@@ -66,10 +71,15 @@ def settings_press(btn):
os.chdir(os.path.dirname(os.path.abspath(__file__)))
with gui('SeqSim') as app:
Organism_options = os.listdir('../seq_sim/simulation_settings/')
dir_path = os.path.dirname(os.path.realpath(__file__))
dir_path_up = os.sep.join(dir_path.split(os.sep)[:-1])
sim_settings_path = dir_path_up+os.sep+'SeqSimEvo'+os.sep+'simulation_settings/'
Organism_options = os.listdir(sim_settings_path)
app.addLabelOptionBox('Organism',Organism_options)
Scenario_options = os.listdir('settings_files/')
scenario_settings_path = dir_path_up+os.sep+'Scenarios'+os.sep+'settings_files/'
Scenario_options = os.listdir(scenario_settings_path)
app.addLabelOptionBox('Scenario',Scenario_options)
app.addButton('select',select)
......
import os
import sys
dir_path = os.path.dirname(os.path.realpath(__file__))
dir_path_up = os.sep.join(dir_path.split(os.sep)[:-1])
sys.path.append(dir_path_up+os.sep+'Scenarios')
from virus_passaging import passaging
import matplotlib.pyplot as plt
......@@ -73,10 +79,3 @@ def run(scenario,scenario_settings,organism_settings):
fasta = multiple_compartments.run(scenario_settings,organism_settings)
return fasta
if __name__ == '__main__':
#events = [(10,'t',0.1),(20,'v',1e5)] #(time,eventtype, new value), events: t: tranfer prop change, v: total volume change
#skyline(events, plot=True, plot_freq=5,progress=True)
control(plot=True,plot_freq=10)
#migration()
#root()
import seq_sim
import SeqSimEvo as seq_sim
import numpy as np
from collections import Counter
from copy import deepcopy
from tqdm import tqdm
class Simulation(seq_sim.Simulation):
def __init__(self,interactionTable=None,interactionFraction=0.3,**kwargs):
def __init__(self,interactionTable=None,interactionFraction=0.3, ratio=0.5,**kwargs):
seq_sim.Simulation.__init__(self,**kwargs)
if interactionTable is None:
self.interactionTable = self.generateInteractions(fraction=interactionFraction)
self.interactionTable = self.generateInteractions(fraction=interactionFraction, ratio=ratio)
else:
self.interactionTable = interactionTable
......@@ -44,9 +44,17 @@ class Simulation(seq_sim.Simulation):
if combi[0][0]>combi[1][0]:
combi = [combi[1],combi[0]]
if np.random.random() < ratio:
combi.append(1)
#combi.append(1)
fit1 = self.get_fitness_effect(combi[0][0],combi[0][1])
fit2 = self.get_fitness_effect(combi[1][0],combi[1][1])
cur = fit1*fit2
new = 1+np.random.exponential(self.settings['parameters']['lb'])
if new>cur:
combi.append(new)
else:
combi.append(cur)
else:
combi.append(-1)
combi.append(0)
try:
combis[tuple(combi[0])].append(combi[1:])
except KeyError:
......@@ -60,23 +68,26 @@ class Simulation(seq_sim.Simulation):
changes = self.current_gen.get_seq(sequence_id)
fitness = 1
if changes is not None:
for pos, base in zip(changes[:, 0], changes[:, 1]):
fitness *= (self.fitness_table[int(base), int(pos)])
if len(changes)>1:
sort = np.argsort(changes[:,0])
for i,change1 in enumerate(changes[sort]):
if tuple(change1) in self.interactionTable:
ch1 = [tuple(c[0]) for c in self.interactionTable[tuple(change1)]]
chs2 = set([tuple(c) for c in changes[sort][i+1:]])
interactions = set(ch1).intersection(chs2)
for interaction in interactions:
value = [self.interactionTable[tuple(change1)][i][-1] for i in range(len(ch1)) if ch1[i]==interaction]
if value[-1] == -1:
fitness = fitness/2 #TODO: hard coded effect, code differently!
elif value[-1] == 1:
fitness = fitness*2
else:
raise ValueError("value should be -1 or 1, but is {}".format(value[-1]))
#for pos, base in zip(changes[:, 0], changes[:, 1]):
# fitness *= (self.fitness_table[int(base), int(pos)])
sort = np.argsort(changes[:,0])
already_done = []
for i,change1 in enumerate(changes[sort]):
if tuple(change1) in self.interactionTable:
ch1 = [tuple(c[0]) for c in self.interactionTable[tuple(change1)]]
chs2 = set([tuple(c) for c in changes[sort][i+1:]])
interactions = set(ch1).intersection(chs2)
for interaction in interactions:
already_done += [tuple(change1),interaction]
value = [self.interactionTable[tuple(change1)][i][-1] for i in range(len(ch1)) if ch1[i]==interaction]
fitness*=np.product(value)
done=False
if len(already_done)>0:
if tuple(change1) in already_done:
done=True
if not done:
fitness*=(self.fitness_table[int(change1[1]), int(change1[0])])
if return_fitness:
return np.random.poisson(fitness*R0), fitness
return np.random.poisson(fitness*R0)
......@@ -97,7 +108,7 @@ class Seq(seq_sim.Seq):
def analyze_passage(sim,f,passage):
try:
changes = np.vstack(sim.current_gen.changes.values())
changes = np.vstack(list(sim.current_gen.changes.values()))
except ValueError:
return 0
n_seq = float(sim.n_seq)
......@@ -120,7 +131,7 @@ def run_custom():
fraction = float(sys.argv[8])
env1_1 = Simulation(name='env1_1',interactionFraction=interactionFraction,
ratio=ratio,seq_len=9000)
ratio=ratio,seq_len=500)
sims = [env1_1]+[env1_1.copy('env1_'+str(i)) for i in range(2,6)]
env2_1 = env1_1.copy('env2_1')
env2_1.transform_fitnessTable(transform=[transformType,transformValue],fraction=fraction)
......@@ -156,27 +167,27 @@ if __name__ == '__main__':
resultfile = sys.argv[2]
timingsfile = sys.argv[3]
env1_1 = Simulation(name='env1_1',interactionFraction=1,
ratio=0,seq_len=9000)
env1_1 = Simulation(name='env1_1',interactionFraction=0.05,
ratio=0,seq_len=500)
sims = [env1_1]+[env1_1.copy('env1_'+str(i)) for i in range(2,6)]
env2_1 = env1_1.copy('env2_1')
#nv2_1.generateInteractions(float(sys.argv[4]),1)
#env2_1.transform_fitnessTable(transform=['scramble',0],fraction=0.3)
env2_1.transform_fitnessTable(transform=['product',1.1],fraction=1)
env2_1.transform_fitnessTable(transform=['set',0],fraction=0.3)
env2_1.transform_fitnessTable(transform=['set',1],fraction=0.3)
#env2_1.transform_fitnessTable(transform=['product',1.1],fraction=1)
#env2_1.transform_fitnessTable(transform=['set',0],fraction=0.3)
#env2_1.transform_fitnessTable(transform=['set',1],fraction=0.3)
sims += [env2_1]+[env2_1.copy('env2_'+str(i)) for i in range(2,6)]
with open(interactionfile,'w') as f:
with open(interactionfile,'wb') as f:
pickle.dump(env1_1.interactionTable,f)
f = open(resultfile,'w',buffering=1)
t = open(timingsfile,'w',buffering=1)
for i in range(201):
for i in range(10):
for sim in sims:
if i%2 == 0:
if 'env2' in sim.settings['name']:
......
from __future__ import print_function
import SeqSimEvo as seq_sim
import numpy as np
from collections import Counter
from copy import deepcopy
from tqdm import tqdm
import sys
class Simulation(seq_sim.Simulation):
def __init__(self,epistasis={}, ratio=0.5,fitness_table=None,**kwargs):
seq_sim.Simulation.__init__(self,**kwargs)
if fitness_table is None:
self.fitness_table = self.__getfitness_table()
else:
self.fitness_table = fitness_table
self.settings['epistasis'] = epistasis
def __getfitness_table(self):
return {}
def assign_value(self,changes):
if len(changes) == 1: #single mutation: draw from MFED
if self.settings['model'] == 'exponential':
pars = self.settings['parameters']
random_nr = np.random.random()
if random_nr < pars['fl']: #lethal mutation
return 0
elif random_nr < (pars['fl']+pars['fd']):
fit = 1-np.random.exponential(pars['ld'])
if fit<0:
fit=0
return fit
elif random_nr > (pars['fl']+pars['fd']+pars['fn']):
return 1+np.random.exponential(pars['lb'])
if self.settings['model'] == 'neutral':
return 1
if self.settings['model'] == 'lognormal':
random_nr = np.random.random()
pars = self.settings['parameters']
if random_nr < pars['fl']: #lethal mutation
return 0
else:
return np.random.lognormal(mean=pars['mu'],sigma=pars['sigma'])
else:
raise('model {} not implemented'.format(self.settings['model']))
else:
epi = self.settings['epistasis']
random_nr = np.random.random()
if epi['model'] == 'exponential':
random_nr = np.random.random()
if random_nr < epi['par']['fl']:
e = 0
elif random_nr < epi['par']['fl']+epi['par']['fd']:
e = np.exp(-np.random.exponential(epi['par']['ld']))
else:
e = np.exp(np.random.exponential(epi['par']['lb']))
if epi['model'] == 'none':
e = 1
else:
print('model {} not implemented'.format(epi['model']))
raise()
if len(changes) == 2:
str1 = '{}-{}'.format(changes[0][0],changes[0][1])
str2 = '{}-{}'.format(changes[1][0],changes[1][1])
try:
f1 = self.fitness_table[str1]
except KeyError:
self.fitness_table[str1] = self.assign_value([changes[0]])
f1 = self.fitness_table[str1]
try:
f2 = self.fitness_table[str2]
except KeyError:
self.fitness_table[str2] = self.assign_value([changes[1]])
f2 = self.fitness_table[str2]
else:
found=False
for c in range(len(changes)):
subset = np.vstack([changes[:c],changes[c+1:]])
changes_str = ['{}-{}'.format(i[0],i[1]) for i in subset]
changes_str = ' '.join(sorted(changes_str))
if changes_str in self.fitness_table.keys():
f1 = self.fitness_table[changes_str]
found=True
break
if not found:
self.fitness_table[changes_str] = self.assign_value(subset)
f1 = self.fitness_table[changes_str]
try:
f2 = self.fitness_table['{}-{}'.format(changes[c][0],changes[c][1])]
except KeyError:
self.fitness_table['{}-{}'.format(changes[c][0],changes[c][1])] = self.assign_value([changes[c]])
f2 = self.fitness_table['{}-{}'.format(changes[c][0],changes[c][1])]
return e*f1*f2
def get_nr_offspring(self, sequence_id, return_fitness=False):
"""returns the number of offspring of a sequence according to the fitness
of that sequence"""
R0 = self.settings['R0']
changes = self.current_gen.get_seq(sequence_id)
if changes is None:
fitness = 1
else:
changes_str = ['{}-{}'.format(i[0],i[1]) for i in changes]
changes_str = ' '.join(sorted(changes_str))
try:
fitness = self.fitness_table[changes_str]
except KeyError:
self.fitness_table[changes_str] = self.assign_value(changes)
fitness = self.fitness_table[changes_str]
if return_fitness:
return np.random.poisson(R0*fitness), fitness
else:
return np.random.poisson(R0*fitness)
def copy(self,name):
return Simulation(simulation_settings=deepcopy(self.settings),
epistasis=self.settings['epistasis'],
sequence = self.sequence,
fitness_table=deepcopy(self.fitness_table),
name= name)
class Population(seq_sim.Population):
def __init__(self,**kwargs):
seq_sim.Population.__init__(self,**kwargs)
class Seq(seq_sim.Seq):
def __init__(self,**kwargs):
seq_sim.Seq.__init__(self,**kwargs)
if __name__ == '__main__':
n_gen = 200
pars = {'fl': 0.1,'mu': -0.25,'sigma': 0.15}
epistasis = {'same':1, 'other':0,
'par':{'fb':0,'lb':0,'fl':0,'fd':0,'ld':0,'fn':1},
'model':'none'}
sim = Simulation(model='lognormal',
parameters = pars,
epistasis=epistasis,
n_seq_init=1000,
max_pop=10000,
mut_rate=1e-5,
seq_len=1000)
for gen in range(n_gen):
sim.new_generation()
if gen%10 == 0:
print('mean: {}'.format(sim.average_fitness))
from __future__ import print_function
import SeqSimEvo as sim
import numpy as np
master_sim = sim.Simulation(simulation_settings='HIV')
n_gen = 10
#settings of the different compartments
compartment_names = ['sanctuary', 'liver','blood']
compartment_sizes = [1e3, 1e5, 1e2]
initial_sizes = [0, 0, 10]
migration_rates = np.array([[0, 0, 1e-3], #row=from, col = to
[0, 0, 1e-2],
[1e-3, 1e-2, 0]])
replication = [0.2, 1, 0.166]
mut_rate = [0.2, 1, 0]
fasta = ''
#setup different compartments:
compartments = []
for i in range(len(compartment_names)):
compartments.append(master_sim.copy(compartment_names[i], n_seq=initial_sizes[i]))
compartments[-1].settings['max_pop'] = compartment_sizes[i]
compartments[-1].settings['mut_rate'] = master_sim.settings['mut_rate']*mut_rate[i]
compartments[-1].settings['R0'] = master_sim.settings['R0']*replication[i]
#do simulations
for gen in range(n_gen):
#new generation
for comp in compartments:
comp.new_generation(dieout=True)
#do migration
additions = [[] for i in compartment_names]
for comp1 in range(len(compartments)):
for comp2 in range(len(compartments)):
migration = migration_rates[comp1, comp2]
if migration>0:
n_migrate = np.random.poisson(migration*len(compartments[comp1].current_gen))
for i in range(n_migrate):
migrant = np.random.randint(len(compartments[comp1].current_gen))
additions[comp2].append(compartments[comp1].current_gen.get_seq(migrant))
compartments[comp1].current_gen.delete_sequence(migrant)
for i, add in enumerate(additions):
for seq in add:
compartments[i].current_gen.add_sequence(changes = seq)
#liver transplant
# if gen == 50:
# compartments[1].settings['max_pop'] = 0
#
# if gen == 51:
# compartments[1].settings['max_pop'] = compartment_sizes[1]
#sampling
if gen in [4,20,50,99]:#55,70,99]:
fasta += compartments[0].current_gen.to_fasta(n_seq=10,description='-1-{}'.format(gen))
fasta += compartments[1].current_gen.to_fasta(n_seq=10,description='-2-{}'.format(gen))
#fasta += compartments[2].current_gen.to_fasta(n_seq=10,description='-3-{}'.format(gen))
#print some stats
print('gen {}: nseq'.format(gen),end=' ')
for comp in compartments:
print(comp.current_gen.stats()['n_seq'],end=' ')
print('')
with open('3comp.fasta', 'w') as f:
f.write(fasta)
import seq_sim
from __future__ import print_function
import SeqSimEvo as seq_sim
import numpy as np
......@@ -50,7 +51,7 @@ class multiple_compartments(object):
name = names[i]
else:
name = str(i)
print kwargs
print(kwargs)
new_kwargs = {j: kwargs[j][i] for j in kwargs}
self.sims.append(self.base_sim.copy(name,n_seq=n_seq,**new_kwargs))
......@@ -103,7 +104,7 @@ class multiple_compartments(object):
##TODO: implement temporal sampling
def run(scenario_settings,organism_settings):
kw_settings = {}
print scenario_settings
print(scenario_settings)
for i in scenario_settings:
if i in ['n_comparments','diverse_index','names','n_seq_init','migration',
'mut_rate','R0','max_pop']:
......@@ -126,7 +127,7 @@ def run(scenario_settings,organism_settings):
fasta = ''
for i in range(scenario_settings['n_gen']):
print i
print(i)
sim.new_generation()
if i+1 in scenario_settings['sampling_times']:
for j,s in enumerate(sim.sims):
......@@ -183,7 +184,7 @@ if __name__ == '__main__':
else:
settings['migration'] = ast.literal_eval(args.mig)
print args.sa
print(args.sa)
settings['sampling_amount'] = args.sa
if args.st is None:
......@@ -206,4 +207,4 @@ if __name__ == '__main__':
if args.maxpop is not None:
settings['max_pop'] = args.maxpop
print run(settings, args.o)
print(run(settings, args.o))
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 19 14:29:00 2017
@author: eva
"""
import seq_sim as sim
from __future__ import print_function
import SeqSimEvo as sim
from collections import Counter
import numpy as np
import pickle
......@@ -85,7 +83,7 @@ def recreate_dataset(sample_sizes,nr_mutations,apobec,model=None,parameters=None
if n_changed > nr_mutations[e]:
if abs(nr_mutations[e]-n_changed)<abs(nr_mutations[e]-previous):
if action == 'print':
print '#{} generations'.format(i)
print('#{} generations'.format(i))
this_patient.current_gen.print_sample(sample)
elif action == 'shared_stats':
stats_here.add(this_patient.current_gen,sample,e)
......@@ -99,7 +97,7 @@ def recreate_dataset(sample_sizes,nr_mutations,apobec,model=None,parameters=None
break
else:
if action == 'print':
print '#{} generations'.format(i-1)
print('#{} generations'.format(i-1))
previous_gen.print_sample(old_sample)
elif action == 'shared_stats':
stats_here.add(previous_gen,old_sample,e)
......@@ -187,7 +185,7 @@ if __name__ == '__main__':
simulation_settings=settings)
if action == 'shared_stats': #todo: prettier print
for i in simulation:
print i
print(i)
if (action == 'n_generations') or (action == 'fasta'):
print simulation
print(simulation)
import seq_sim
from __future__ import print_function
import SeqSimEvo as seq_sim
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
......@@ -30,10 +31,10 @@ class Simulation(seq_sim.Simulation):
tag = self.current_gen.tags[seq_id_old]
#add new functionality: mutate tag
try:
nr_mut = self.mutations_per_tag.next()
nr_mut = next(self.mutations_per_tag)
except StopIteration:
self.mutations_per_tag = self.new_mutations_per_tag()
nr_mut = self.mutations_per_tag.next()
nr_mut = next(self.mutations_per_tag)
if nr_mut>0:
tag_seq = self.sequence.get_tag(tag)
success_mut = 0
......@@ -97,7 +98,7 @@ class Population(seq_sim.Population):
seq=i,
patient=self.sim.settings['name'],
tag = self.tags[i])
print string
print(string)
......@@ -136,10 +137,10 @@ class Seq(seq_sim.Seq):
return number
def output_tags(sim,transfer):
print '#transfer', transfer
print('#transfer', transfer)
counts = Counter(sim.current_gen.tags)
for i in counts:
print i, counts[i]
print(i, counts[i])
def output_fitness_per_tag(sim,transfer):
#print '#',transfer
......@@ -158,11 +159,11 @@ def output_fitness_per_tag(sim,transfer):
# changes.append(0)
for i in tags:
print transfer, i, np.mean(tags[i]), np.max(tags[i]),np.min(tags[i]),len(tags[i])
print(transfer, i, np.mean(tags[i]), np.max(tags[i]),np.min(tags[i]),len(tags[i]))
def output_seqs(sim,transfer):
if transfer in [99,199]:
print '#',transfer
print('#',transfer)
sim.current_gen.print_sample(sim.current_gen.get_sample(sim.current_gen.n_seq))
......@@ -227,7 +228,7 @@ def output_all(sim, transfer):
'experimentID':experimentID,
'changes': changes_string
}
print '{count},{tag},{fitness},{transfer},{experimentID},{changes}'.format(**data)
print('{count},{tag},{fitness},{transfer},{experimentID},{changes}'.format(**data))
count=1
data = {'count': count,
'tag': sim.current_gen.tags[seqID],
......@@ -236,7 +237,7 @@ def output_all(sim, transfer):
'experimentID':experimentID,
'changes': changes_string
}
print '{count},{tag},{fitness},{transfer},{experimentID},{changes}'.format(**data)
print('{count},{tag},{fitness},{transfer},{experimentID},{changes}'.format(**data))
if __name__ == '__main__':
......@@ -244,10 +245,11 @@ if __name__ == '__main__':
import tqdm
if sys.argv[1] in ['help','-h','h']:
print 'usage: python sequence_tags.py MFED nBarcodes distBarcodes popSize diverseStart transferProp'
print('usage: python sequence_tags.py MFED nBarcodes distBarcodes popSize diverseStart transferProp')
exit()
elif sys.argv[1] == 'neutral':
scenario = 'neutral'
param=[]
elif sys.argv[1] == 'selection':
scenario = 'exponential'
elif sys.argv[1] == 'more_lethal':
......@@ -263,7 +265,7 @@ if __name__ == '__main__':
scenario = 'exponential'
param = {'fl': 0.1,'fb': 0.0,'fn':0.6,'lb': 0,'fd': 0.3, 'ld': 0.21}
else:
print 'unknown MFED'
print('unknown MFED')
exit()
n_tags = int(sys.argv[2])
......@@ -280,12 +282,12 @@ if __name__ == '__main__':
div = float(sys.argv[5]) #before setting this != 0: make sure diversified sequences don't have too large fitness effects!!!!!!!
if div != 0:
print 'diversity diffferent from 0 not implemented yet!!'
print('diversity diffferent from 0 not implemented yet!!')
exit()
transfer_prop = float(sys.argv[6])
n_transfer = 200
n_transfer = 10
experimentID = '_'.join(sys.argv[1:])
#run_sim(tag_dist, pop_size,transfer_prop,n_transfer,div,scenario,output_tags)
......
pop_size: [5000,5000] #population size per population
max_pop: [5000,5000] #maximum population size (global or per population)
transfer_prop: 0.5 #transfer popultation (global or per population)
migration: [[0 ,0.0005], #migration rate (None or migration matrix)
[0.0005, 0]]
sampling: [0,0] #sampling rate (per population)
n_transfer: 1000 #number of transfers
n_gen_per_transfer: 2 #number of generations per transfer
events: [[1000], #events (changes to parameters) during simulation
[sampling], #format: [[passage nrs], [parameters to change], [new values]]
[[20,20]]]
pop_size: [100,10] #population size per population
max_pop: [1000,100] #maximum population size (global or per population)
transfer_prop: 0.5 #transfer popultation (global or per population)
migration: [[0 ,0.01], #migration rate (None or migration matrix)
[0.01, 0]]
sampling: [0,0] #sampling rate (per population)
n_transfer: 100 #number of transfers
n_gen_per_transfer: 2 #number of generations per transfer
events: [[100], #events (changes to parameters) during simulation
[sampling], #format: [[passage nrs], [parameters to change], [new values]]
[[100,100]]]
pop_size: [100,100,100,100] #population size per population
max_pop: 400000 #maximum population size (global or per population)
transfer_amount: 100 #transfer popultation (global or per population)
migration: #migration rate (None or migration matrix)
sampling: [0, 0,0,0] #number of samples (per population)
n_transfer: 300 #number of transfers
n_gen_per_transfer: 2 #number of generations per transfer
events: [[300],
['sampling'],
[[1000,1000,1000,1000]]]
pop_size: [10000] #population size per population
max_pop: [10000] #maximum population size (global or per population)
pop_size: [100000,100000,100000] #population size per population
max_pop: [100000,100000,100000] #maximum population size (global or per population)
transfer_prop: 0.01 #transfer popultation (global or per population)
migration: #migration rate (None or migration matrix)
sampling: [0, 0] #number of samples (per population)
migration: [[0,0,0],[0,0,0],[0,0,0]] #migration rate (None or migration matrix)
sampling: [0, 0, 0] #number of samples (per population)
n_transfer: 200 #number of transfers
n_gen_per_transfer: 2 #number of generations per transfer
events: [[100, 101, 199, 200],
['sampling', 'sampling','sampling','sampling'],
[[100], [0], [100], [0]]]
[[1000,1000,1000], [0,0,0], [1000,1000,1000], [0,0,0]]]
import seq_sim
from __future__ import print_function
import SeqSimEvo as seq_sim
import argparse
def run(scenario_settings, sim_settings):
......@@ -54,4 +55,4 @@ if __name__ == '__main__':
#run scenario
print run(settings, args.o)
print(run(settings, args.o))
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 11 15:13:13 2017
......@@ -7,23 +5,30 @@ Created on Tue Jul 11 15:13:13 2017
virus passaging as the experiment in the liquid handling robot
"""
#import optparse
import seq_sim as sim
from __future__ import print_function
import os
import sys
from copy import deepcopy as copy
dir_path = os.path.dirname(os.path.realpath(__file__))
dir_path_up = os.sep.join(dir_path.split(os.sep)[:-1])
sys.path.append(dir_path_up+os.sep+'SeqSimEvo')
import SeqSimEvo as sim
from tqdm import tqdm
import yaml
class passaging():
def __init__(self, sim_settings, passaging_settings):
self.settings = passaging_settings
main_sim = sim.Simulation(sim_settings, n_seq_init = max(self.settings['pop_size']))
self.settings = copy(passaging_settings)
main_sim = sim.Simulation(copy(sim_settings), n_seq_init = max(self.settings['pop_size']))
self.sims = []
self.n_pop = len(self.settings['pop_size'])
names = iter(range(self.n_pop))
for i in range(self.n_pop):
self.sims.append(main_sim.copy(names.next(),n_seq=self.settings['pop_size'][i]))
self.sims.append(main_sim.copy(next(names),n_seq=self.settings['pop_size'][i]))
try:
self.sims[i].settings['max_pop'] = self.settings['max_pop'][i]
except TypeError:
......@@ -33,7 +38,6 @@ class passaging():
def next_passage(self):
self.cur_passage+=1
#print self.cur_passage
#handle events
while self.cur_passage in self.settings['events'][0]:
loc = self.settings['events'][0].index(self.cur_passage)
......@@ -60,7 +64,6 @@ class passaging():
description=' - pop {} - transfer {} '.format(i,self.cur_passage))
#change parameters for transfer
#print pop.current_gen.n_seq
if 'transfer_amount' in self.settings:
pop.settings['max_pop'] = self.settings['transfer_amount']
else:
......@@ -91,7 +94,10 @@ class passaging():
def run(scenario_settings,organism_settings):
passaging_run = passaging(organism_settings,scenario_settings)
passaging_run.all_passages()
return passaging_run.output
fasta = passaging_run.output
if len(fasta) < 6000:
print(passaging_run.sims)
return fasta
......@@ -133,7 +139,6 @@ if __name__ == '__main__':
[passage number] [parameters to change] [new values] : ..."')
args = parser.parse_args()
settings = {}
settings['n_transfer'] = args.n
......@@ -168,4 +173,4 @@ if __name__ == '__main__':
#run scenario
print run(settings, args.o)
print(run(settings, args.o))
from SeqSimEvo.simulation import *
......@@ -103,20 +103,20 @@ class Population():
stats['unmutated'] = self.changes[0,3]
stats['total_mutations'] = 0
for i,node in enumerate(self.changes[1:]):
if node[3] > 0:
n_muts = 1
parent = int(node[2])
while parent!=0:
parent = int(self.changes[parent,2])
mut_counts[parent-1]+=1
n_muts+=1
stats['total_mutations'] += n_muts*node[3]
# stats['total_mutations'] = 0
# for i,node in enumerate(self.changes[1:]):
# if node[3] > 0:
# n_muts = 1
# parent = int(node[2])
# while parent!=0:
# parent = int(self.changes[parent,2])
# mut_counts[parent-1]+=1
# n_muts+=1
# stats['total_mutations'] += n_muts*node[3]
stats['unique_mutations'] = len(set(self.changes[1:,1]))
print mut_counts
# print mut_counts
return stats
......@@ -196,10 +196,11 @@ class Population():
# else:
# return np.nan
sim = Simulation(sequence=Seq(seq='ATA'))
test = Population(sim,10)
test.add_sequence()
test.add_sequence([0.1,2.1])
test.add_sequence([2.1])
test.add_sequence([0.3,2.3])
print test.stats()
if __name__ == '__main__':
sim = Simulation(sequence=Seq(seq='ATA'))
test = Population(sim,10)
test.add_sequence()
test.add_sequence([0.1,2.1])
test.add_sequence([2.1])
test.add_sequence([0.3,2.3])
print test.stats()
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 15 13:42:33 2016
@author: eva
"""
from __future__ import print_function
import sys
import numpy as np
np.set_printoptions(threshold=np.nan, suppress=True)
np.set_printoptions(threshold=sys.maxsize, suppress=True)
from collections import Counter
import random
from copy import copy, deepcopy
......@@ -14,9 +14,7 @@ import time
import yaml
import os
import inspect
import progressbar
import scipy.stats as scats
import sys
class Seq(object):
'''
......@@ -37,8 +35,6 @@ class Seq(object):
self.sequence = self.translate_sequence(seq)
self.len = len(seq)
def __str__(self):
seq = ''
for i in self.sequence:
......@@ -61,6 +57,8 @@ class Seq(object):
def generate_seq(self, seq_len, base_dist=None):
'''generate a sequence of seq_len bases according to the base
distribution (order: A-G-T-C)'''
if seq_len < 0:
raise TypeError('seq_len must be positive integer')
seq = []
if base_dist is None:
base_dist = np.array([0.25, 0.5, 0.75, 1])
......@@ -105,7 +103,7 @@ class Simulation(object):
except TypeError:
self.settings = simulation_settings
for key, value in kwargs.iteritems():
for key, value in kwargs.items():
self.settings[key] = value
......@@ -259,10 +257,10 @@ class Simulation(object):
"""mutates a sequence (with existing mutations) of length N, according to
the per base mutation rate"""
try:
nr_mut = self.mutations_per_seq.next()
nr_mut = next(self.mutations_per_seq)
except StopIteration:
self.mutations_per_seq = self.new_mutations_per_seq()
nr_mut = self.mutations_per_seq.next()
nr_mut = next(self.mutations_per_seq)
if nr_mut>0:
success_mut = 0
......@@ -295,6 +293,10 @@ class Simulation(object):
fitnesses = [0]*self.current_gen.n_seq
weights = [0]*self.current_gen.n_seq
#generate offspring list
try:
xrange()
except NameError:
xrange = range
for i in xrange(self.current_gen.n_seq):
#find changes in current sequence
#find the number of offspring based on the mutations that already took place
......@@ -303,7 +305,6 @@ class Simulation(object):
fitnesses[i] = fitness
# temp = [i]*(n_offspring)
#all_offspring += temp #make a list off the offspring per sequence
#get average fitness of this generation
self.average_fitness = np.mean(fitnesses)
......@@ -314,9 +315,13 @@ class Simulation(object):
if sum(weights) > self.settings['max_pop']:
#reduce the population randomly to max_pop
all_offspring = sorted(np.random.choice(range(self.current_gen.n_seq),
size=int(self.settings['max_pop']),
p=np.array(weights,dtype=float)/sum(weights)))
try:
all_offspring = sorted(np.random.choice(range(self.current_gen.n_seq),
size=int(self.settings['max_pop']),
p=np.array(weights,dtype=float)/sum(weights)))
except TypeError:
print(weights)
np.array(weights,dtype=float)/sum(weights)
else:
all_offspring = [i for i,j in enumerate(weights) for k in xrange(j)]
#actually create the next generation
......@@ -332,7 +337,7 @@ class Simulation(object):
if new_gen.n_seq == 0 and not dieout :
print 'died out'
print('died out')
n_gen = self.gen
self = self.copy(self.settings['name'])
for i in range(n_gen):
......@@ -409,7 +414,7 @@ class Population():
to=j[1],
seq=i,
patient=self.sim.settings['name'])
print string
print(string)
def sample_to_string(self, seq_ids):
''' return a summary of the mutation that have occured in all seq_ids in
......@@ -500,7 +505,10 @@ class Population():
all_mutations = [tuple(row) for row in all_mutations]
stats['unique_mutations'] = len(set(all_mutations))
mut_counts = np.array(Counter(all_mutations).values())
if len(all_mutations) > 0:
mut_counts = np.array(list(Counter(all_mutations).values()))
else:
mut_counts = []
if len(mut_counts) > 0:
stats['majority_mutations'] = sum(mut_counts > (stats['n_seq']/2.0))
stats['max_fraction'] = max(mut_counts/float(stats['n_seq']))
......@@ -600,12 +608,18 @@ class Population():
return np.nan
if __name__ == '__main__':
settings = '/home/eva/code/SeqSim/seq_sim/simulation_settings/phix174' #sys.argv[1]
n_gen = 20#int(sys.argv[2])
#settings = '/home/eva/code/SeqSim/seq_sim/simulation_settings/phix174' #sys.argv[1]
n_gen = 200
sim = Simulation(settings)
pars = {'fl': 0.1,'mu': -0.25,'sigma': 0.15}
sim = Simulation(model='lognormal',
parameters = pars,
n_seq_init=1000,
max_pop=10000,
mut_rate=1e-5,
seq_len=1000)
for i in range(n_gen):
sim.new_generation()
print sim.current_gen.Hamming_distance(sim.settings,sim.current_gen.get_sample(10),action='Poisson_fit')
if i%10 == 0:
print('mean: {}'.format(sim.average_fitness))
#settings for the MFED
model: 'lognormal'
parameters:
fl: 0.45
mu: -0.248
sigma: 0.149
#settings for new mutations
mut_rate: 1.2e-4
subs_matrix: [[0.54505451, 0.79257926, 0.90629063, 1.],
[0.38363836, 0.81938194, 0.90849085, 1.],
[0.21922192, 0.33003300, 0.75747575, 1.],
[0.18228177, 0.29717028, 0.54194581, 1.]]
#sequence settings
seq_len: 669
basedist: [ 0.25, 0.5, 0.75 , 1] #A,G,T,C
#simulation settings
n_seq_init: 1
R0: 6
ga_increase: 1
max_pop: 10000
name: 'HCV'
#settings for the MFED
model: 'exponential'
parameters:
fl: 0.182
fb: 0.053
lb: 0.202
#settings for new mutations
mut_rate: 2.16e-5
subs_matrix: [[0.54505451, 0.79257926, 0.90629063, 1.],
[0.38363836, 0.81938194, 0.90849085, 1.],
[0.21922192, 0.33003300, 0.75747575, 1.],
[0.18228177, 0.29717028, 0.54194581, 1.]]
#sequence settings
seq_len: 2600
basedist: [ 0.37, 0.61, 0.8 , 1] #A,G,T,C
#simulation settings
n_seq_init: 1
R0: 6
ga_increase: 1
max_pop: 10000
name: 'HIV'
#settings for the MFED
model: 'neutral'
#settings for new mutations
mut_rate: 2.16e-5
subs_matrix: [[0.54505451, 0.79257926, 0.90629063, 1.],
[0.38363836, 0.81938194, 0.90849085, 1.],
[0.21922192, 0.33003300, 0.75747575, 1.],
[0.18228177, 0.29717028, 0.54194581, 1.]]
#sequence settings
seq_len: 2600
basedist: [ 0.37, 0.61, 0.8 , 1] #A,G,T,C
#simulation settings
n_seq_init: 1
R0: 6
ga_increase: 1
max_pop: 10000
name: 'HIV'
......@@ -39,7 +39,7 @@ subs_matrix: [[0.25, 0.50, 0.75, 1.], #A
#sequence settings
seq_len: 5386
basedist: [0.25,0.5,0.75,1] #A,G,T,C
basedist: [0.24,0.47,0.79,1] #A,G,T,C
#simulation settings
n_seq_init: 1000000
......
#settings for the MFED
model: exponential #spikes #from_data
parameters:
fl: 0.0 #exponential, lognormal, spikes
fb: 0.29 #exponential (scalar)
lb: 0.3 #exponential
fd: 0.51 #exponential
ld: 0.21 #exponential
# fn: #exponential
# mu: #lognormal
sigma: #lognormal
loc: [ 0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ,1.1]
freq: [0.2, 0.021538461538461541, 0.0, 0.021538461538461541, 0.0,
0.021538461538461541,0.021538461538461541,0.067692307692307704,
0.067692307692307704, 0.20307692307692307, 0.29230769230769232,
0.089230769230769238]
values: [0.999, 0.0, 0.849, 0.652, 0.895, 0.937, 0.8089999999999999,
0.6859999999999999, 0.878, 0.694, 0.0, 0.948, 1.035, 0.959, 1.068,
1.015, 1.023, 0.0, 0.0, 0.0, 0.0, 0.858, 0.0, 0.862, 1.022,
0.5700000000000001, 0.795, 1.061, 1.059, 1.001, 1.02, 0.0, 1.019,
1.006, 0.984, 0.482, 0.0, 0.963, 1.061, 0.926, 0.926,
0.33999999999999997, 0.11099999999999999, 1.006, 0.943]
SD: [0.026999999999999996, 0, 0.078, 0.315, 0.21299999999999997, 0.27,
0.21599999999999997, 0.10799999999999998, 0.06, 0.171, 0, 0.096, 0.078,
0.09, 0.039, 0.078, 0.171, 0, 0, 0, 0, 0.44399999999999995, 0,
0.08700000000000001, 0.056999999999999995, 0.252, 0.183, 0.09, 0.129,
0.051000000000000004, 0.056999999999999995, 0, 0.10500000000000001,
0.07200000000000001, 0.015, 0.069, 0, 0.069, 0.372,
0.10200000000000001, 0.012, 0.07200000000000001, 0.099, 0.003, 0.084]
#settings for new mutations
mut_rate: 1.0e-6
#to A G T C from
subs_matrix: [[0.25, 0.50, 0.75, 1.], #A
[0.25, 0.50, 0.75, 1.], #G
[0.25, 0.50, 0.75, 1.], #T
[0.25, 0.50, 0.75, 1.]] #C
#sequence settings
seq_len: 5386
basedist: [0.25,0.5,0.75,1] #A,G,T,C
#simulation settings
n_seq_init: 1000000
R0: 100
ga_increase: 1
max_pop: 1000000 #0.8e+9
name: 'phiX174'
#settings for the MFED
model: 'exponential'
parameters:
fb: 1
lb: 0.01
# fl: 0.045
# mu: -0.248
# sigma: 0.149
#settings for new mutations
mut_rate: 0.01
subs_matrix: [[0.54505451, 0.79257926, 0.90629063, 1.],
[0.38363836, 0.81938194, 0.90849085, 1.],
[0.21922192, 0.33003300, 0.75747575, 1.],
[0.18228177, 0.29717028, 0.54194581, 1.]]
#sequence settings
seq_len: 10
basedist: [ 0.37, 0.61, 0.8 , 1] #A,G,T,C
#simulation settings
n_seq_init: 1
R0: 1000
ga_increase: 1
max_pop: 10000
name: 'small'
# seq_sim - simulations of viral diversification
## installation
First, make sure you have a working python installation (2.7), with the following packages installed
First, make sure you have a working python installation (2.7/3.6), with the following packages installed
* numpy
* scipy
* pyyaml
* progressbar
* Tqdm
* Ete3
* Ipython
* matplotlib
* six
Currently, I have not managed to find an easy way to install/manage packages, sorry!
To install, you'll have to manually put the `seq_sim` folder somewhere in your `PYTHONPATH`
......@@ -48,7 +46,7 @@ see `help(seq_sim.Simulation)` for all possible settings
sim.current_gen.stats()
# sim_scripts - simulation of specific scenarios using seq_sim
# Scenarios - simulation of specific scenarios using SeqSimEvo
## The gui
### Installation
Install dependencies:
......
from seq_sim.simulation import *