def test_memory(): stoichiometric_matrix, rates, initial_state, final_state = load_complexation( ) duration = 1 amplify = 100 this = psutil.Process(os.getpid()) memory = memory_previous = this.memory_info().rss memory_increases = 0 print('initial memory use: {}'.format(memory)) system = StochasticSystem(stoichiometric_matrix, random_seed=np.random.randint(2**31)) obsidian_start = seconds_since_epoch() for i in range(1, amplify + 1): memory = this.memory_info().rss if memory > memory_previous: print('memory use before iteration {:2d}: {}'.format(i, memory)) memory_previous = memory memory_increases += 1 result = system.evolve(duration, initial_state, rates) difference = np.abs(final_state - result['outcome']).sum() if difference: print('difference is {}'.format(difference)) obsidian_end = seconds_since_epoch() print('obsidian C implementation elapsed seconds for {} runs: {}'.format( amplify, obsidian_end - obsidian_start)) assert memory_increases <= 1
def test_complexation(): fixtures_root = os.path.join('data', 'complexation') def load_state(filename): with open(os.path.join(fixtures_root, filename)) as f: state = np.array(json.load(f)) return state initial_state = load_state('initial_state.json') final_state = load_state('final_state.json') assert initial_state.size == final_state.size n_metabolites = initial_state.size with open(os.path.join(fixtures_root, 'stoichiometry.json')) as f: stoichiometry_sparse = json.load(f) n_reactions = len(stoichiometry_sparse) stoichiometric_matrix = np.zeros((n_metabolites, n_reactions), np.int64) for (reaction_index, reaction_stoich) in enumerate(stoichiometry_sparse): for (str_metabolite_index, stoich) in reaction_stoich.viewitems(): # JSON doesn't allow for integer keys... metabolite_index = int(str_metabolite_index) stoichiometric_matrix[metabolite_index, reaction_index] = stoich duration = 1 # semi-quantitative rate constants rates = np.full(n_reactions, 10) system = StochasticSystem(stoichiometric_matrix, rates) time, counts, events = system.evolve(initial_state, duration) assert(len(time)-1 == events.sum()) outcome = counts[-1] difference = (final_state - outcome) total = np.abs(difference).sum() print('differences: {}'.format(total)) print('total steps: {}'.format(len(time))) print(time) return (time, counts, events)
def test_hang(): # TODO: Use a pytest plug-in to timeout after some threshold. seed = 807952948 stoich = np_load('stoich.npy') mol = np_load('complex-counts.npy') rates = np_load('rates.npy') system = StochasticSystem(stoich, random_seed=seed) for i in range(7300): if i % 100 == 0: print(i) result = system.evolve(1, mol, rates)
def test_equilibration(): stoichiometric_matrix = np.array([ [-1, +1, 0], [+1, -1, -1]]) rates = np.array([10, 10, 0.1]) system = StochasticSystem(stoichiometric_matrix, rates) state = np.array([1000, 0]) duration = 10 time, counts, events = system.evolve(state, duration) assert counts[-1].sum() < state.sum() assert time[-1] <= duration return (time, counts, events)
def test_dimerization(): stoichiometric_matrix = np.array([ [-1, -2, +1], [-1, 0, +1], [+1, 0, -1], [ 0, +1, 0]]) rates = np.array([3, 1, 1]) * 0.01 system = StochasticSystem(stoichiometric_matrix, rates) state = np.array([1000, 1000, 0, 0]) duration = 1 time, counts, events = system.evolve(state, duration) assert time[-1] <= duration return (time, counts, events)
def test_flagella(): stoichiometry = np.array([[ 0, 0, 0, 0, 0, -4, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 ], [ -26, -34, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 ], [ 0, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -5, -120, 0, 0, 1, 0, -1, -1 ]]) substrate = np.array([ 21, 1369, 69, 4, 1, 1674, 0, 48, 53, 49, 61, 7, 3, 28, 151, 0, 26, 20, 3, 3, 9, 16, 7, 26, 280, 26, 64, 67, 310, 13559, 208, 16, 22, 0, 17, 0 ], np.int64) rates = np.array([1.e-05, 1.e-05, 1.e-05, 1.e-05, 1.e-05, 1.e-05]) arrow = StochasticSystem(stoichiometry) result = arrow.evolve(1.0, substrate, rates) print('flagella result: {}'.format(result))
def test_obsidian(): stoichiometric_matrix = np.array( [[1, 1, -1, 0], [-2, 0, 0, 1], [-1, -1, 1, 0]], np.int64) rates = np.array([3, 1, 1]) * 0.01 arrow = StochasticSystem(stoichiometric_matrix) result = arrow.evolve(1.0, np.array([50, 20, 30, 40], np.int64), rates) print('steps: {}'.format(result['steps'])) print('time: {}'.format(result['time'])) print('events: {}'.format(result['events'])) print('occurrences: {}'.format(result['occurrences'])) print('outcome: {}'.format(result['outcome'])) assert (arrow.obsidian.reactions_count() == stoichiometric_matrix.shape[0]) assert ( arrow.obsidian.substrates_count() == stoichiometric_matrix.shape[1]) return result
def test_pickle(): stoichiometric_matrix = np.array( [[1, 1, -1, 0], [-2, 0, 0, 1], [-1, -1, 1, 0]], np.int64) rates = np.array([3, 1, 1]) * 0.01 arrow = StochasticSystem(stoichiometric_matrix) pickled_arrow = pickle.dumps(arrow) unpickled_arrow = pickle.loads(pickled_arrow) result = arrow.evolve(1.0, np.array([50, 20, 30, 40], np.int64), rates) straight = test_obsidian() assert (result['steps'] == straight['steps']) assert ((result['time'] == straight['time']).all()) assert ((result['events'] == straight['events']).all()) assert ((result['occurrences'] == straight['occurrences']).all()) assert ((result['outcome'] == straight['outcome']).all()) print('arrow object pickled is {} bytes'.format(len(pickled_arrow)))
def test_compare_runtime(): stoichiometric_matrix, rates, initial_state, final_state = load_complexation( ) duration = 1 amplify = 100 reference = GillespieReference(stoichiometric_matrix) reference_start = seconds_since_epoch() for i in range(amplify): result = reference.evolve(duration, initial_state, rates) reference_end = seconds_since_epoch() system = StochasticSystem(stoichiometric_matrix) obsidian_start = seconds_since_epoch() for i in range(amplify): result = system.evolve(duration, initial_state, rates) obsidian_end = seconds_since_epoch() print('reference Python implementation elapsed seconds: {}'.format( reference_end - reference_start)) print( 'obsidian C implementation elapsed seconds: {}'.format(obsidian_end - obsidian_start))
def __init__(self, initial_parameters=None): if not initial_parameters: initial_parameters = {} super(Complexation, self).__init__(initial_parameters) self.derive_defaults('stoichiometry', 'reaction_ids', keys_list) self.monomer_ids = self.parameters['monomer_ids'] self.complex_ids = self.parameters['complex_ids'] self.reaction_ids = self.parameters['reaction_ids'] self.stoichiometry = self.parameters['stoichiometry'] self.rates = self.parameters['rates'] self.complexation_stoichiometry, self.complexation_rates = build_complexation_stoichiometry( self.stoichiometry, self.rates, self.reaction_ids, self.monomer_ids, self.complex_ids) self.complexation = StochasticSystem(self.complexation_stoichiometry) self.mass_deriver_key = self.or_default(initial_parameters, 'mass_deriver_key')
def test_get_set_random_state(): stoich = np.array([[1, 1, -1, 0], [-2, 0, 0, 1], [-1, -1, 1, 0]]) system = StochasticSystem(stoich) state = np.array([1000, 1000, 0, 0]) rates = np.array([3.0, 1.0, 1.0]) system.evolve(1, state, rates) rand_state = system.obsidian.get_random_state() result_1 = system.evolve(1, state, rates) result_2 = system.evolve(1, state, rates) with np.testing.assert_raises(AssertionError): for key in ('time', 'events', 'occurrences', 'outcome'): np.testing.assert_array_equal(result_1[key], result_2[key]) system.obsidian.set_random_state(*rand_state) result_1_again = system.evolve(1, state, rates) for key in ('time', 'events', 'occurrences', 'outcome'): np.testing.assert_array_equal(result_1[key], result_1_again[key])
class Translation(Process): name = 'translation' defaults = { 'sequences': { ('oA', 'eA'): A, ('oAZ', 'eA'): A, ('oAZ', 'eZ'): Z, ('oB', 'eB'): B, ('oBY', 'eB'): B, ('oBY', 'eY'): Y}, 'templates': { ('oA', 'eA'): generate_template(('oA', 'eA'), 20, ['eA']), ('oAZ', 'eA'): generate_template(('oAZ', 'eA'), 20, ['eA']), ('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 60, ['eZ']), ('oB', 'eB'): generate_template(('oB', 'eB'), 30, ['eB']), ('oBY', 'eB'): generate_template(('oBY', 'eB'), 30, ['eB']), ('oBY', 'eY'): generate_template(('oBY', 'eY'), 40, ['eY'])}, 'transcript_affinities': { ('oA', 'eA'): 1.0, ('oAZ', 'eA'): 2.0, ('oAZ', 'eZ'): 5.0, ('oB', 'eB'): 1.0, ('oBY', 'eB'): 2.0, ('oBY', 'eY'): 5.0}, 'elongation_rate': 5.0, 'polymerase_occlusion': 10, 'symbol_to_monomer': amino_acids, 'monomer_ids': monomer_ids, 'concentration_keys': [], 'mass_deriver_key': 'mass_deriver', 'concentrations_deriver_key': 'translation_concentrations', 'time_step': 1.0, } def __init__(self, initial_parameters=None): '''A stochastic translation model .. WARNING:: Vivarium's knowledge base uses the gene name to name the protein. This means that for a gene acrA that codes for protein ArcA, you must refer to the gene, transcript, and protein each as acrA. .. DANGER:: This documentation will need to be updated to reflect the changes in `#185 <https://github.com/CovertLab/vivarium/pull/185>`_ :term:`Ports`: * **ribosomes**: Expects the ``ribosomes`` variable, whose value is a list of the configurations of the ribosomes currently active. * **molecules**: Expects variables for each of the RNA nucleotides. * **transcripts**: Expects variables for each transcript to translate. Translation will read transcripts from this port. * **proteins**: Expects variables for each protein product. The produced proteins will be added to this port as counts. * **concentrations**: Expects variables for each key in ``concentration_keys``. This will be used by a :term:`deriver` to convert counts to concentrations. Arguments: initial_parameters: A dictionary of configuration options. Accepts the following keys: * **sequences** (:py:class:`dict`): Maps from operon name to the RNA sequence of the operon, as a :py:class:`str`. * **templates** (:py:class:`dict`): Maps from the name of an transcript to a :term:`template specification`. The template specification may be generated by :py:func:`cell.library.polymerize.generate_template` like so: >>> from vivarium_cell.library.polymerize import ( ... generate_template) >>> from vivarium.library.pretty import format_dict >>> terminator_index = 5 >>> template = generate_template( ... 'oA', terminator_index, ['product1']) >>> print(format_dict(template)) { "direction": 1, "id": "oA", "position": 0, "sites": [], "terminators": [ { "position": 5, "products": [ "product1" ], "strength": 1.0 } ] } * **transcript_affinities** (:py:class:`dict`): A map from the name of a transcript to the binding affinity (a :py:class:`float`) of the ribosome for the transcript. * **elongation_rate** (:py:class:`float`): The elongation rate of the ribosome. .. todo:: Units of elongation rate * **polymerase_occlusion** (:py:class:`int`): The number of base pairs behind the polymerase where another polymerase is occluded and so cannot bind. * **symbol_to_monomer** (:py:class:`dict`): Maps from the symbols used to represent monomers in the RNA sequence to the name of the free monomer. This should generally be :py:data:`cell.data.amino_acids.amino_acids`. * **monomer_ids** (:py:class:`list`): A list of the names of the free monomers consumed by translation. This can generally be computed as: >>> import pprint >>> >>> from vivarium_cell.data.amino_acids import amino_acids >>> monomer_ids = amino_acids.values() >>> pp = pprint.PrettyPrinter() >>> pp.pprint(list(monomer_ids)) ['Alanine', 'Arginine', 'Asparagine', 'Aspartate', 'Cysteine', 'Glutamate', 'Glutamine', 'Glycine', 'Histidine', 'Isoleucine', 'Leucine', 'Lysine', 'Methionine', 'Phenylalanine', 'Proline', 'Serine', 'Threonine', 'Tryptophan', 'Tyrosine', 'Valine'] Note that we only included the `list()` transformation to make the output prettier. The `dict_values` object returned by `.values()` is sufficiently list-like for use here. Also note that :py:mod:`pprint` just makes the output prettier. * **concentration_keys** (:py:class:`list`): A list of variables you want to be able to access as concentrations from the *concentrations* port. The actual conversion is handled by a deriver. Example configuring the process (uses :py:func:vivarium.library.pretty.format_dict): >>> from vivarium.library.pretty import format_dict >>> from vivarium_cell.data.amino_acids import amino_acids >>> from vivarium_cell.library.polymerize import generate_template >>> random.seed(0) # Needed because process is stochastic >>> np.random.seed(0) >>> configurations = { ... 'sequences': { ... ('oA', 'eA'): 'AWDPT', ... ('oAZ', 'eZ'): 'YVEGELENGGMFISC', ... }, ... 'templates': { ... ('oA', 'eA'): generate_template(('oA', 'eA'), 5, ['eA']), ... ('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 15, ['eA', 'eZ']), ... }, ... 'transcript_affinities': { ... ('oA', 'eA'): 1.0, ... ('oAZ', 'eZ'): 1.0, ... }, ... 'elongation_rate': 10.0, ... 'polymerase_occlusion': 10, ... 'symbol_to_monomer': amino_acids, ... 'monomer_ids': amino_acids.values(), ... 'concentration_keys': [] ... } >>> # make the translation process, and initialize the states >>> translation = Translation(configurations) # doctest:+ELLIPSIS >>> states = { ... 'ribosomes': {}, ... 'molecules': {}, ... 'proteins': {UNBOUND_RIBOSOME_KEY: 2}, ... 'transcripts': { ... 'oA': 10, ... 'oAZ': 10, ... } ... } >>> states['molecules'].update( ... { ... molecule_id: 100 ... for molecule_id in translation.monomer_ids ... } ... ) >>> update = translation.next_update(1, states) >>> print(update['ribosomes']) {'_add': [{'path': (1,), 'state': <class 'vivarium_cell.processes.translation.Ribosome'>: {'id': 1, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}, {'path': (2,), 'state': <class 'vivarium_cell.processes.translation.Ribosome'>: {'id': 2, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}], '_delete': []} ''' if not initial_parameters: initial_parameters = {} self.monomer_symbols = list(amino_acids.keys()) self.monomer_ids = list(amino_acids.values()) self.default_parameters = copy.deepcopy(self.defaults) templates = self.or_default(initial_parameters, 'templates') self.default_parameters['protein_ids'] = all_products({ key: Template(config) for key, config in templates.items()}) self.default_parameters['transcript_order'] = list( initial_parameters.get( 'transcript_affinities', self.default_parameters['transcript_affinities']).keys()) self.default_parameters['molecule_ids'] = self.monomer_ids self.parameters = copy.deepcopy(self.default_parameters) self.parameters.update(initial_parameters) self.sequences = self.parameters['sequences'] self.templates = self.parameters['templates'] self.transcript_affinities = self.parameters['transcript_affinities'] self.operons = gather_genes(self.transcript_affinities) self.operon_order = list(self.operons.keys()) self.transcript_order = self.parameters['transcript_order'] self.transcript_count = len(self.transcript_order) self.monomer_ids = self.parameters['monomer_ids'] self.molecule_ids = self.parameters['molecule_ids'] self.molecule_ids.extend(['ATP', 'ADP']) self.protein_ids = self.parameters['protein_ids'] self.symbol_to_monomer = self.parameters['symbol_to_monomer'] self.elongation = 0 self.elongation_rate = self.parameters['elongation_rate'] self.polymerase_occlusion = self.parameters['polymerase_occlusion'] self.concentration_keys = self.parameters['concentration_keys'] self.affinity_vector = np.array([ self.transcript_affinities[transcript_key] for transcript_key in self.transcript_order], dtype=np.float64) self.stoichiometry = build_stoichiometry(self.transcript_count) self.initiation = StochasticSystem(self.stoichiometry) self.ribosome_id = 0 self.protein_keys = self.concentration_keys + self.protein_ids self.all_protein_keys = self.protein_keys + [UNBOUND_RIBOSOME_KEY] self.mass_deriver_key = self.or_default(initial_parameters, 'mass_deriver_key') self.concentrations_deriver_key = self.or_default( initial_parameters, 'concentrations_deriver_key') log.info('translation parameters: {}'.format(self.parameters)) super(Translation, self).__init__(self.parameters) def ports_schema(self): def add_mass(schema, masses, key): if '_properties' not in schema: schema['_properties'] = {} if key in masses: schema['_properties']['mw'] = masses[key] return schema return { 'ribosomes': { '*': { 'id': { '_default': -1, '_updater': 'set'}, 'domain': { '_default': 0, '_updater': 'set'}, 'state': { '_default': None, '_updater': 'set', '_emit': True}, 'position': { '_default': 0, '_updater': 'set', '_emit': True}, 'template': { '_default': None, '_updater': 'set', '_emit': True}, 'template_index': { '_default': 0, '_updater': 'set', '_emit': True}}}, 'global': {}, 'molecules': { molecule: add_mass({ '_emit': True, '_default': 0, '_divider': 'split'}, molecular_weight, molecule) for molecule in self.molecule_ids}, 'transcripts': { transcript: add_mass({ '_default': 0, '_divider': 'split'}, molecular_weight, transcript) for transcript in list(self.operons.keys())}, 'proteins': { protein: add_mass({ '_default': 0, '_divider': 'split', '_emit': True}, molecular_weight, protein) for protein in self.all_protein_keys}, 'concentrations': { molecule: { '_default': 0.0, '_updater': 'set'} for molecule in self.protein_keys}} def derivers(self): return { self.mass_deriver_key: { 'deriver': 'mass_deriver', 'port_mapping': { 'global': 'global'}}, self.concentrations_deriver_key: { 'deriver': 'concentrations_deriver', 'port_mapping': { 'global': 'global', 'counts': 'proteins', 'concentrations': 'concentrations'}, 'config': { 'concentration_keys': self.protein_keys}}} def next_update(self, timestep, states): molecules = states['molecules'] transcripts = states['transcripts'] proteins = states['proteins'] ribosomes = { id: Ribosome(ribosome) for id, ribosome in states['ribosomes'].items()} original_ribosome_keys = ribosomes.keys() gene_counts = np.array( list(transcripts_to_gene_counts(transcripts, self.operons).values()), dtype=np.int64) # Find out how many transcripts are currently blocked by a # newly initiated ribosome bound_transcripts = np.zeros(self.transcript_count, dtype=np.int64) ribosomes_by_transcript = { transcript_key: [] for transcript_key in self.transcript_order} for ribosome in ribosomes.values(): ribosomes_by_transcript[ribosome.template].append(ribosome) for index, transcript in enumerate(self.transcript_order): bound_transcripts[index] = len([ ribosome for ribosome in ribosomes_by_transcript[transcript] if ribosome.is_bound()]) # Make the state for a gillespie simulation out of total number of each # transcript not blocked by a bound ribosome, concatenated with the number # of each transcript that is bound by a ribosome. # These are the two states for each transcript the simulation # will operate on, essentially going back and forth between # bound and unbound states. original_unbound_ribosomes = proteins[UNBOUND_RIBOSOME_KEY] monomer_limits = { monomer: molecules[monomer] for monomer in self.monomer_ids} unbound_ribosomes = original_unbound_ribosomes templates = { key: Template(template) for key, template in self.templates.items()} time = 0 now = 0 elongation = Elongation( self.sequences, templates, monomer_limits, self.symbol_to_monomer, self.elongation) while time < timestep: # build the state vector for the gillespie simulation substrate = np.concatenate([ gene_counts - bound_transcripts, bound_transcripts, [unbound_ribosomes]]) # find number of monomers until next terminator distance = 1 / self.elongation_rate # find interval of time that elongates to the point of the next terminator interval = min(distance, timestep - time) if interval == distance: # perform the elongation until the next event terminations, monomer_limits, ribosomes = elongation.step( interval, monomer_limits, ribosomes) unbound_ribosomes += terminations else: elongation.store_partial(interval) terminations = 0 # run simulation for interval of time to next terminator result = self.initiation.evolve( interval, substrate, self.affinity_vector) # go through each event in the simulation and update the state ribosome_bindings = 0 for now, event in zip(result['time'], result['events']): # ribosome has bound the transcript transcript_key = self.transcript_order[event] bound_transcripts[event] += 1 self.ribosome_id += 1 new_ribosome = Ribosome({ 'id': self.ribosome_id, 'template': transcript_key, 'position': 0}) new_ribosome.bind() new_ribosome.start_polymerizing() ribosomes[new_ribosome.id] = new_ribosome ribosome_bindings += 1 unbound_ribosomes -= 1 # deal with occluding rnap for ribosome in ribosomes.values(): if ribosome.is_unoccluding(self.polymerase_occlusion): bound_transcripts[ribosome.template_index] -= 1 ribosome.unocclude() time += interval # track how far elongation proceeded to start from next iteration self.elongation = elongation.elongation - int(elongation.elongation) proteins = { UNBOUND_RIBOSOME_KEY: unbound_ribosomes - original_unbound_ribosomes} proteins.update(elongation.complete_polymers) molecules = { key: count * -1 for key, count in elongation.monomers.items()} original = set(original_ribosome_keys) current = set(ribosomes.keys()) bound_ribosomes = current - original completed_ribosomes = original - current continuing_ribosomes = original - completed_ribosomes # ATP hydrolysis cost is 2 per amino acid elongation molecules['ATP'] = 0 molecules['ADP'] = 0 for count in elongation.monomers.values(): molecules['ATP'] -= 2 * count molecules['ADP'] += 2 * count ribosome_updates = { id: ribosomes[id] for id in continuing_ribosomes} add_ribosomes = [ {'path': (bound,), 'state': ribosomes[bound]} for bound in bound_ribosomes] delete_ribosomes = [ (completed,) for completed in completed_ribosomes] ribosome_updates['_add'] = add_ribosomes ribosome_updates['_delete'] = delete_ribosomes update = { 'ribosomes': ribosome_updates, 'molecules': molecules, 'proteins': proteins} return update
def __init__(self, initial_parameters=None): '''A stochastic translation model .. WARNING:: Vivarium's knowledge base uses the gene name to name the protein. This means that for a gene acrA that codes for protein ArcA, you must refer to the gene, transcript, and protein each as acrA. .. DANGER:: This documentation will need to be updated to reflect the changes in `#185 <https://github.com/CovertLab/vivarium/pull/185>`_ :term:`Ports`: * **ribosomes**: Expects the ``ribosomes`` variable, whose value is a list of the configurations of the ribosomes currently active. * **molecules**: Expects variables for each of the RNA nucleotides. * **transcripts**: Expects variables for each transcript to translate. Translation will read transcripts from this port. * **proteins**: Expects variables for each protein product. The produced proteins will be added to this port as counts. * **concentrations**: Expects variables for each key in ``concentration_keys``. This will be used by a :term:`deriver` to convert counts to concentrations. Arguments: initial_parameters: A dictionary of configuration options. Accepts the following keys: * **sequences** (:py:class:`dict`): Maps from operon name to the RNA sequence of the operon, as a :py:class:`str`. * **templates** (:py:class:`dict`): Maps from the name of an transcript to a :term:`template specification`. The template specification may be generated by :py:func:`cell.library.polymerize.generate_template` like so: >>> from vivarium_cell.library.polymerize import ( ... generate_template) >>> from vivarium.library.pretty import format_dict >>> terminator_index = 5 >>> template = generate_template( ... 'oA', terminator_index, ['product1']) >>> print(format_dict(template)) { "direction": 1, "id": "oA", "position": 0, "sites": [], "terminators": [ { "position": 5, "products": [ "product1" ], "strength": 1.0 } ] } * **transcript_affinities** (:py:class:`dict`): A map from the name of a transcript to the binding affinity (a :py:class:`float`) of the ribosome for the transcript. * **elongation_rate** (:py:class:`float`): The elongation rate of the ribosome. .. todo:: Units of elongation rate * **polymerase_occlusion** (:py:class:`int`): The number of base pairs behind the polymerase where another polymerase is occluded and so cannot bind. * **symbol_to_monomer** (:py:class:`dict`): Maps from the symbols used to represent monomers in the RNA sequence to the name of the free monomer. This should generally be :py:data:`cell.data.amino_acids.amino_acids`. * **monomer_ids** (:py:class:`list`): A list of the names of the free monomers consumed by translation. This can generally be computed as: >>> import pprint >>> >>> from vivarium_cell.data.amino_acids import amino_acids >>> monomer_ids = amino_acids.values() >>> pp = pprint.PrettyPrinter() >>> pp.pprint(list(monomer_ids)) ['Alanine', 'Arginine', 'Asparagine', 'Aspartate', 'Cysteine', 'Glutamate', 'Glutamine', 'Glycine', 'Histidine', 'Isoleucine', 'Leucine', 'Lysine', 'Methionine', 'Phenylalanine', 'Proline', 'Serine', 'Threonine', 'Tryptophan', 'Tyrosine', 'Valine'] Note that we only included the `list()` transformation to make the output prettier. The `dict_values` object returned by `.values()` is sufficiently list-like for use here. Also note that :py:mod:`pprint` just makes the output prettier. * **concentration_keys** (:py:class:`list`): A list of variables you want to be able to access as concentrations from the *concentrations* port. The actual conversion is handled by a deriver. Example configuring the process (uses :py:func:vivarium.library.pretty.format_dict): >>> from vivarium.library.pretty import format_dict >>> from vivarium_cell.data.amino_acids import amino_acids >>> from vivarium_cell.library.polymerize import generate_template >>> random.seed(0) # Needed because process is stochastic >>> np.random.seed(0) >>> configurations = { ... 'sequences': { ... ('oA', 'eA'): 'AWDPT', ... ('oAZ', 'eZ'): 'YVEGELENGGMFISC', ... }, ... 'templates': { ... ('oA', 'eA'): generate_template(('oA', 'eA'), 5, ['eA']), ... ('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 15, ['eA', 'eZ']), ... }, ... 'transcript_affinities': { ... ('oA', 'eA'): 1.0, ... ('oAZ', 'eZ'): 1.0, ... }, ... 'elongation_rate': 10.0, ... 'polymerase_occlusion': 10, ... 'symbol_to_monomer': amino_acids, ... 'monomer_ids': amino_acids.values(), ... 'concentration_keys': [] ... } >>> # make the translation process, and initialize the states >>> translation = Translation(configurations) # doctest:+ELLIPSIS >>> states = { ... 'ribosomes': {}, ... 'molecules': {}, ... 'proteins': {UNBOUND_RIBOSOME_KEY: 2}, ... 'transcripts': { ... 'oA': 10, ... 'oAZ': 10, ... } ... } >>> states['molecules'].update( ... { ... molecule_id: 100 ... for molecule_id in translation.monomer_ids ... } ... ) >>> update = translation.next_update(1, states) >>> print(update['ribosomes']) {'_add': [{'path': (1,), 'state': <class 'vivarium_cell.processes.translation.Ribosome'>: {'id': 1, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}, {'path': (2,), 'state': <class 'vivarium_cell.processes.translation.Ribosome'>: {'id': 2, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}], '_delete': []} ''' if not initial_parameters: initial_parameters = {} self.monomer_symbols = list(amino_acids.keys()) self.monomer_ids = list(amino_acids.values()) self.default_parameters = copy.deepcopy(self.defaults) templates = self.or_default(initial_parameters, 'templates') self.default_parameters['protein_ids'] = all_products({ key: Template(config) for key, config in templates.items()}) self.default_parameters['transcript_order'] = list( initial_parameters.get( 'transcript_affinities', self.default_parameters['transcript_affinities']).keys()) self.default_parameters['molecule_ids'] = self.monomer_ids self.parameters = copy.deepcopy(self.default_parameters) self.parameters.update(initial_parameters) self.sequences = self.parameters['sequences'] self.templates = self.parameters['templates'] self.transcript_affinities = self.parameters['transcript_affinities'] self.operons = gather_genes(self.transcript_affinities) self.operon_order = list(self.operons.keys()) self.transcript_order = self.parameters['transcript_order'] self.transcript_count = len(self.transcript_order) self.monomer_ids = self.parameters['monomer_ids'] self.molecule_ids = self.parameters['molecule_ids'] self.molecule_ids.extend(['ATP', 'ADP']) self.protein_ids = self.parameters['protein_ids'] self.symbol_to_monomer = self.parameters['symbol_to_monomer'] self.elongation = 0 self.elongation_rate = self.parameters['elongation_rate'] self.polymerase_occlusion = self.parameters['polymerase_occlusion'] self.concentration_keys = self.parameters['concentration_keys'] self.affinity_vector = np.array([ self.transcript_affinities[transcript_key] for transcript_key in self.transcript_order], dtype=np.float64) self.stoichiometry = build_stoichiometry(self.transcript_count) self.initiation = StochasticSystem(self.stoichiometry) self.ribosome_id = 0 self.protein_keys = self.concentration_keys + self.protein_ids self.all_protein_keys = self.protein_keys + [UNBOUND_RIBOSOME_KEY] self.mass_deriver_key = self.or_default(initial_parameters, 'mass_deriver_key') self.concentrations_deriver_key = self.or_default( initial_parameters, 'concentrations_deriver_key') log.info('translation parameters: {}'.format(self.parameters)) super(Translation, self).__init__(self.parameters)
class Complexation(Process): defaults = { 'monomer_ids': chromosome.complexation_monomer_ids, 'complex_ids': chromosome.complexation_complex_ids, 'stoichiometry': chromosome.complexation_stoichiometry, 'rates': chromosome.complexation_rates, 'mass_deriver_key': 'mass_deriver' } def __init__(self, initial_parameters=None): if not initial_parameters: initial_parameters = {} super(Complexation, self).__init__(initial_parameters) self.derive_defaults('stoichiometry', 'reaction_ids', keys_list) self.monomer_ids = self.parameters['monomer_ids'] self.complex_ids = self.parameters['complex_ids'] self.reaction_ids = self.parameters['reaction_ids'] self.stoichiometry = self.parameters['stoichiometry'] self.rates = self.parameters['rates'] self.complexation_stoichiometry, self.complexation_rates = build_complexation_stoichiometry( self.stoichiometry, self.rates, self.reaction_ids, self.monomer_ids, self.complex_ids) self.complexation = StochasticSystem(self.complexation_stoichiometry) self.mass_deriver_key = self.or_default(initial_parameters, 'mass_deriver_key') def ports_schema(self): return { 'monomers': { monomer: { '_default': 0, '_emit': True, '_properties': { 'mw': molecular_weight[monomer] } if monomer in molecular_weight else {} } for monomer in self.monomer_ids }, 'complexes': { complex: { '_default': 0, '_emit': True, '_properties': { 'mw': molecular_weight[complex] } if complex in molecular_weight else {} } for complex in self.complex_ids }, 'global': {} } def derivers(self): return { self.mass_deriver_key: { 'deriver': 'mass', 'port_mapping': { 'global': 'global' } } } def next_update(self, timestep, states): monomers = states['monomers'] complexes = states['complexes'] substrate = np.zeros(len(self.monomer_ids) + len(self.complex_ids), dtype=np.int64) for index, monomer_id in enumerate(self.monomer_ids): substrate[index] = monomers[monomer_id] for index, complex_id in enumerate(self.complex_ids): substrate[index + len(self.monomer_ids)] = complexes[complex_id] result = self.complexation.evolve(timestep, substrate, self.complexation_rates) outcome = result['outcome'] - substrate monomers_update = { monomer_id: outcome[index] for index, monomer_id in enumerate(self.monomer_ids) } complexes_update = { complex_id: outcome[index + len(self.monomer_ids)] for index, complex_id in enumerate(self.complex_ids) } update = {'monomers': monomers_update, 'complexes': complexes_update} return update
class Transcription(Process): name = 'transcription' defaults = { 'promoter_affinities': {}, 'transcription_factors': [], 'sequence': '', 'templates': {}, 'genes': {}, 'elongation_rate': 1.0, 'polymerase_occlusion': 5, 'symbol_to_monomer': nucleotides, 'monomer_ids': monomer_ids, 'concentrations_deriver_key': 'transcription_concentrations', 'initial_domains': { 0: { 'id': 0, 'lead': 0, 'lag': 0, 'children': [] } }, 'molecule_ids': monomer_ids, 'time_step': 1.0, } def __init__(self, initial_parameters=None): '''A stochastic transcription model .. WARNING:: Vivarium's knowledge base uses the gene name to name the protein. This means that for a gene acrA that codes for protein AcrA, you must refer to the gene, transcript, and protein each as acrA. :term:`Ports`: * **chromosome**: The linked :term:`store` should hold a representation of the chromosome in the form returned by :py:meth:`vivarium.states.chromosome.Chromosome.to_dict`. * **molecules**: Expects variables with the names in the *molecule_ids* configuration. These are the monomers consumed by transcription. * **factors**: Expects variables for each transcription factor's concentration. * **transcripts**: The linked store should store the concentrations of the transcripts. * **proteins**: The linked store should hold the concentrations of the transcription factors and the RNA polymerase. Arguments: initial_parameters: The following configuration options may be provided: * **promoter_affinities** (:py:class:`dict`): Maps from binding state tuples to the binding affinity of RNA polymerase and the promoter when the promoter is at that binding state. The binding state of a promoter is which (if any) transcription factors are bound to the promoter. Such a binding state can be represented by a binding state tuple, which is a :py:class:`tuple` whose first element is the name of the promoter. All bound transcription factors are listed as subsequent elements. If no transcription factors are bound, the sole subsequent element is ``None``. .. todo:: What is the significance of the order in the binding state tuple? .. todo:: What are the units of the affinities? * **transcription_factors** (:py:class:`list`): A list of all modeled transcription factors. * **sequence**: The DNA sequence that includes all the genes whose transcription is being modeled. * **templates** (:py:class:`dict`): Maps from the name of an operon to that operon's :term:`template specification`. * **genes** (:py:class:`dict`): Maps from operon name to a list of the names of the genes in that operon. * **elongation_rate** (:py:class:`float`): The elongation rate of the RNA polymerase. * **polymerase_occlusion** (:py:class:`int`): The number of base pairs behind the polymerase where another polymerase is occluded and so cannot bind. * **symbol_to_monomer** (:py:class:`dict`): Maps from the symbols used to represent monomers in the RNA sequence to the name of the free monomer. This should generally be :py:data:`vivarium.data.nucleotides.nucleotides`. * **monomer_ids** (:py:class:`list`): A list of the names of the free monomers consumed by transcription. This can generally be computed as: >>> from vivarium.data.nucleotides import nucleotides >>> monomer_ids = nucleotides.values() >>> print(list(monomer_ids)) ['ATP', 'GTP', 'UTP', 'CTP'] Note that we only included the ``list()`` transformation to make the output prettier. The ``dict_values`` object returned by the ``.values()`` call is sufficiently list-like for use here. * **molecule_ids** (:py:class:`list`): A list of all the molecules needed by the :term:`process`. This will generally be the same as *monomer_ids*. Example configuring the process (uses :py:func:`vivarium.library.pretty.format_dict`): >>> import random >>> >>> import numpy as np >>> >>> from vivarium.states.chromosome import ( ... toy_chromosome_config, ... Chromosome, ... ) >>> from vivarium.data.nucleotides import nucleotides >>> # format_dict lets us print dictionaries prettily >>> from vivarium.library.pretty import format_dict >>> >>> random.seed(0) # Needed because process is stochastic >>> np.random.seed(0) >>> # We will use the toy chromosome from toy_chromosome_config >>> print(toy_chromosome_config) {'sequence': 'ATACGGCACGTGACCGTCAACTTA', 'genes': {'oA': ['eA'], 'oAZ': ['eA', 'eZ'], 'oB': ['eB'], 'oBY': ['eB', 'eY']}, 'promoter_order': ['pA', 'pB'], 'promoters': {'pA': {'id': 'pA', 'position': 3, 'direction': 1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfA': <Quantity(0.3, 'millimolar')>}}], 'terminators': [{'position': 6, 'strength': 0.5, 'products': ['oA']}, {'position': 12, 'strength': 1.0, 'products': ['oAZ']}]}, 'pB': {'id': 'pB', 'position': -3, 'direction': -1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfB': <Quantity(0.5, 'millimolar')>}}], 'terminators': [{'position': -9, 'strength': 0.5, 'products': ['oB']}, {'position': -12, 'strength': 1.0, 'products': ['oBY']}]}}, 'promoter_affinities': {('pA', None): 1.0, ('pA', 'tfA'): 10.0, ('pB', None): 1.0, ('pB', 'tfB'): 10.0}, 'domains': {0: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'rnaps': {}} >>> monomer_ids = list(nucleotides.values()) >>> configuration = { ... 'promoter_affinities': { ... ('pA', None): 1.0, ... ('pA', 'tfA'): 10.0, ... ('pB', None): 1.0, ... ('pB', 'tfB'): 10.0}, ... 'transcription_factors': ['tfA', 'tfB'], ... 'sequence': toy_chromosome_config['sequence'], ... 'templates': toy_chromosome_config['promoters'], ... 'genes': toy_chromosome_config['genes'], ... 'elongation_rate': 10.0, ... 'polymerase_occlusion': 5, ... 'symbol_to_monomer': nucleotides, ... 'monomer_ids': monomer_ids, ... 'molecule_ids': monomer_ids, ... } >>> # At this point we haven't used the toy chromosome yet >>> # because it will be specified in the chromosome port. >>> # Notice that the parameters are specific to the chromosome. >>> transcription_process = Transcription(configuration) >>> # Now we need to initialize the simulation stores >>> state = { ... 'chromosome': toy_chromosome_config, ... 'molecules': { ... nucleotide: 10 ... for nucleotide in monomer_ids ... }, ... 'proteins': {UNBOUND_RNAP_KEY: 10}, ... 'factors': {'tfA': 0.2 * units.mM, 'tfB': 0.7 * units.mM}, ... } >>> update = transcription_process.next_update(1.0, state) >>> print(update['chromosome']) {'rnaps': {'_add': [{'path': (2,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 2, 'template': 'pA', 'template_index': 0, 'terminator': 1, 'domain': 0, 'state': 'polymerizing', 'position': 7}}, {'path': (3,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 3, 'template': 'pB', 'template_index': 1, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 3}}, {'path': (4,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 4, 'template': 'pA', 'template_index': 0, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 0}}], '_delete': []}, 'rnap_id': 4, 'domains': {0: <class 'vivarium.states.chromosome.Domain'>: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'root_domain': 0} ''' if not initial_parameters: initial_parameters = {} log.debug( 'inital transcription parameters: {}'.format(initial_parameters)) super(Transcription, self).__init__(initial_parameters) self.derive_defaults('templates', 'promoter_order', keys_list) self.derive_defaults('templates', 'transcript_ids', template_products) self.sequence = self.parameters['sequence'] self.templates = self.parameters['templates'] self.genes = self.parameters['genes'] empty_chromosome = Chromosome({ 'sequence': self.sequence, 'promoters': self.templates, 'genes': self.genes }) self.sequences = empty_chromosome.sequences() self.symbol_to_monomer = self.parameters['symbol_to_monomer'] log.debug('chromosome sequence: {}'.format(self.sequence)) self.promoter_affinities = self.parameters['promoter_affinities'] self.promoter_order = self.parameters['promoter_order'] self.promoter_count = len(self.promoter_order) self.transcription_factors = self.parameters['transcription_factors'] self.molecule_ids = self.parameters['molecule_ids'] self.molecule_ids.extend(['ATP', 'ADP']) self.monomer_ids = self.parameters['monomer_ids'] self.transcript_ids = self.parameters['transcript_ids'] self.elongation = 0 self.elongation_rate = self.parameters['elongation_rate'] self.polymerase_occlusion = self.parameters['polymerase_occlusion'] self.stoichiometry = build_stoichiometry(self.promoter_count) self.initiation = StochasticSystem(self.stoichiometry, random_seed=np.random.randint( 2**31)) self.protein_ids = [UNBOUND_RNAP_KEY] + self.transcription_factors self.initial_domains = self.parameters['initial_domains'] self.concentrations_deriver_key = self.parameters[ 'concentrations_deriver_key'] self.chromosome_ports = ['rnaps', 'rnap_id', 'domains', 'root_domain'] log.debug('final transcription parameters: {}'.format(self.parameters)) def build_affinity_vector(self, promoters, factors): vector = np.zeros(len(self.promoter_order), dtype=np.float64) for index, promoter_key in enumerate(self.promoter_order): promoter = promoters[promoter_key] binding = promoter.binding_state(factors) affinity = self.promoter_affinities.get(binding, 0.0) # print('promoter state - {}: {}'.format(binding, affinity)) vector[index] = affinity return vector def chromosome_config(self, chromosome_states): return dict(chromosome_states, sequence=self.sequence, promoters=self.templates, promoter_order=self.promoter_order, genes=self.genes) def ports_schema(self): schema = {} schema['chromosome'] = { 'rnap_id': { '_default': 1, '_updater': 'set' }, 'root_domain': { '_default': 0, '_updater': 'set' }, 'domains': { '*': { 'id': { '_default': 1, '_updater': 'set' }, 'lead': { '_default': 0, '_updater': 'set' }, 'lag': { '_default': 0, '_updater': 'set' }, 'children': { '_default': [], '_updater': 'set' } } }, 'rnaps': { '*': { 'id': { '_default': -1, '_updater': 'set' }, 'domain': { '_default': 0, '_updater': 'set' }, 'state': { '_default': None, '_updater': 'set', '_emit': True }, 'position': { '_default': 0, '_updater': 'set', '_emit': True }, 'template': { '_default': None, '_updater': 'set', '_emit': True }, 'template_index': { '_default': 0, '_updater': 'set', '_emit': True }, 'terminator': { '_default': 0, '_updater': 'set', '_emit': True } } } } initial_domains = { id: { 'id': { '_default': id, '_updater': 'set' }, 'lead': { '_default': 0, '_updater': 'set' }, 'lag': { '_default': 0, '_updater': 'set' }, 'children': { '_default': [], '_updater': 'set' } } for id, domain in self.initial_domains.items() } schema['chromosome']['domains'].update(initial_domains) schema['molecules'] = { molecule: { '_default': 0, '_divider': 'split', '_emit': True } for molecule in self.molecule_ids } schema['factors'] = { factor: { '_default': 0.0, '_divider': 'split' } for factor in self.transcription_factors } schema['transcripts'] = { protein: { '_default': 0, '_divider': 'split', '_emit': True } for protein in self.transcript_ids } schema['proteins'] = { protein: { '_default': 0, '_divider': 'split', '_emit': True } for protein in self.protein_ids } schema['global'] = {} return schema def derivers(self): return { self.concentrations_deriver_key: { 'deriver': 'concentrations_deriver', 'port_mapping': { 'global': 'global', 'counts': 'proteins', 'concentrations': 'factors' }, 'config': { 'concentration_keys': self.transcription_factors } } } def next_update(self, timestep, states): chromosome_state = states['chromosome'] # chromosome_state['rnaps'] = list(chromosome_state['rnaps'].values()) original_rnap_keys = [ rnap['id'] for rnap in chromosome_state['rnaps'].values() ] chromosome = Chromosome(self.chromosome_config(chromosome_state)) molecules = states['molecules'] proteins = states['proteins'] factors = states['factors'] # as concentrations promoter_rnaps = chromosome.promoter_rnaps() promoter_domains = chromosome.promoter_domains() # Find out how many promoters are currently blocked by a # newly initiated or occluding rnap promoter_count = len(chromosome.promoter_order) blocked_promoters = np.zeros(promoter_count, dtype=np.int64) open_domains = {} bound_domains = {} for promoter_index, promoter_key in enumerate( chromosome.promoter_order): domains = [] for rnap in promoter_rnaps.get(promoter_key, {}).values(): if rnap.is_occluding(): domains.append(rnap.domain) blocked_promoters[promoter_index] += 1 bound_domains[promoter_key] = set(domains) open_domains[promoter_key] = promoter_domains[ promoter_key] - bound_domains[promoter_key] blocked_promoters = np.array(blocked_promoters) # Make the state for a gillespie simulation out of total number of each # promoter by copy number not blocked by initiated rnap, # concatenated with the number of each promoter that is bound by rnap. # These are the two states for each promoter the simulation # will operate on, essentially going back and forth between # bound and unbound states. copy_numbers = chromosome.promoter_copy_numbers() original_unbound_rnaps = proteins[UNBOUND_RNAP_KEY] monomer_limits = { monomer: molecules[monomer] for monomer in self.monomer_ids } unbound_rnaps = original_unbound_rnaps time = 0 now = 0 elongation = Elongation(self.sequences, chromosome.promoters, monomer_limits, self.symbol_to_monomer, self.elongation) initiation_affinity = self.build_affinity_vector( chromosome.promoters, factors) while time < timestep: # build the state vector for the gillespie simulation substrate = np.concatenate([ copy_numbers - blocked_promoters, blocked_promoters, [unbound_rnaps] ]) log.debug('transcription substrate: {}'.format(substrate)) log.debug('blocked promoters: {}'.format(blocked_promoters)) # find number of monomers until next terminator distance = 1 / self.elongation_rate # chromosome.terminator_distance() # find interval of time that elongates to the point of the next terminator interval = min(distance, timestep - time) if interval == distance: # perform the elongation until the next event terminations, monomer_limits, chromosome.rnaps = elongation.step( interval, monomer_limits, chromosome.rnaps) unbound_rnaps += terminations else: elongation.store_partial(interval) terminations = 0 log.debug('time: {} --- interval: {}'.format(time, interval)) log.debug('monomer limits: {}'.format(monomer_limits)) log.debug('terminations: {}'.format(terminations)) # run simulation for interval of time to next terminator result = self.initiation.evolve(interval, substrate, initiation_affinity) log.debug('result: {}'.format(result)) # perform binding for now, event in zip(result['time'], result['events']): # RNAP has bound the promoter promoter_key = chromosome.promoter_order[event] promoter = chromosome.promoters[promoter_key] domains = open_domains[promoter_key] domain = choose_element(domains) blocked_promoters[event] += 1 bound_domains[promoter_key].add(domain) open_domains[promoter_key].remove(domain) # create a new bound RNAP and add it to the chromosome. new_rnap = chromosome.bind_rnap(event, domain) new_rnap.start_polymerizing() log.debug('newly bound RNAP: {}'.format(new_rnap)) unbound_rnaps -= 1 # deal with occluding rnap for rnap in chromosome.rnaps.values(): if rnap.is_unoccluding(self.polymerase_occlusion): log.debug('RNAP unoccluding: {}'.format(rnap)) blocked_promoters[rnap.template_index] -= 1 bound_domains[rnap.template].remove(rnap.domain) open_domains[rnap.template].add(rnap.domain) rnap.unocclude() log.debug('rnap: {}'.format(rnap)) log.debug('complete: {}'.format(elongation.complete_polymers)) time += interval # track how far elongation proceeded to start from next iteration self.elongation = elongation.elongation - int(elongation.elongation) proteins = {UNBOUND_RNAP_KEY: unbound_rnaps - original_unbound_rnaps} molecules = { key: count * -1 for key, count in elongation.monomers.items() } # 1 ATP hydrolysis cost per nucleotide elongation molecules['ATP'] = 0 molecules['ADP'] = 0 for count in elongation.monomers.values(): molecules['ATP'] -= count molecules['ADP'] += count chromosome_dict = chromosome.to_dict() rnaps = chromosome_dict['rnaps'] original = set(original_rnap_keys) current = set(rnaps.keys()) bound_rnaps = current - original completed_rnaps = original - current continuing_rnaps = original - completed_rnaps rnap_updates = { rnap_id: rnaps[rnap_id] for rnap_id in continuing_rnaps } add_rnaps = [{ 'path': (bound, ), 'state': rnaps[bound] } for bound in bound_rnaps] delete_rnaps = [(completed, ) for completed in completed_rnaps] rnap_updates['_add'] = add_rnaps rnap_updates['_delete'] = delete_rnaps chromosome_dict['rnaps'] = rnap_updates update = { 'chromosome': {key: chromosome_dict[key] for key in self.chromosome_ports}, 'proteins': proteins, 'molecules': molecules, 'transcripts': elongation.complete_polymers } log.debug('molecules update: {}'.format(update['molecules'])) return update
def __init__(self, initial_parameters=None): '''A stochastic transcription model .. WARNING:: Vivarium's knowledge base uses the gene name to name the protein. This means that for a gene acrA that codes for protein AcrA, you must refer to the gene, transcript, and protein each as acrA. :term:`Ports`: * **chromosome**: The linked :term:`store` should hold a representation of the chromosome in the form returned by :py:meth:`vivarium.states.chromosome.Chromosome.to_dict`. * **molecules**: Expects variables with the names in the *molecule_ids* configuration. These are the monomers consumed by transcription. * **factors**: Expects variables for each transcription factor's concentration. * **transcripts**: The linked store should store the concentrations of the transcripts. * **proteins**: The linked store should hold the concentrations of the transcription factors and the RNA polymerase. Arguments: initial_parameters: The following configuration options may be provided: * **promoter_affinities** (:py:class:`dict`): Maps from binding state tuples to the binding affinity of RNA polymerase and the promoter when the promoter is at that binding state. The binding state of a promoter is which (if any) transcription factors are bound to the promoter. Such a binding state can be represented by a binding state tuple, which is a :py:class:`tuple` whose first element is the name of the promoter. All bound transcription factors are listed as subsequent elements. If no transcription factors are bound, the sole subsequent element is ``None``. .. todo:: What is the significance of the order in the binding state tuple? .. todo:: What are the units of the affinities? * **transcription_factors** (:py:class:`list`): A list of all modeled transcription factors. * **sequence**: The DNA sequence that includes all the genes whose transcription is being modeled. * **templates** (:py:class:`dict`): Maps from the name of an operon to that operon's :term:`template specification`. * **genes** (:py:class:`dict`): Maps from operon name to a list of the names of the genes in that operon. * **elongation_rate** (:py:class:`float`): The elongation rate of the RNA polymerase. * **polymerase_occlusion** (:py:class:`int`): The number of base pairs behind the polymerase where another polymerase is occluded and so cannot bind. * **symbol_to_monomer** (:py:class:`dict`): Maps from the symbols used to represent monomers in the RNA sequence to the name of the free monomer. This should generally be :py:data:`vivarium.data.nucleotides.nucleotides`. * **monomer_ids** (:py:class:`list`): A list of the names of the free monomers consumed by transcription. This can generally be computed as: >>> from vivarium.data.nucleotides import nucleotides >>> monomer_ids = nucleotides.values() >>> print(list(monomer_ids)) ['ATP', 'GTP', 'UTP', 'CTP'] Note that we only included the ``list()`` transformation to make the output prettier. The ``dict_values`` object returned by the ``.values()`` call is sufficiently list-like for use here. * **molecule_ids** (:py:class:`list`): A list of all the molecules needed by the :term:`process`. This will generally be the same as *monomer_ids*. Example configuring the process (uses :py:func:`vivarium.library.pretty.format_dict`): >>> import random >>> >>> import numpy as np >>> >>> from vivarium.states.chromosome import ( ... toy_chromosome_config, ... Chromosome, ... ) >>> from vivarium.data.nucleotides import nucleotides >>> # format_dict lets us print dictionaries prettily >>> from vivarium.library.pretty import format_dict >>> >>> random.seed(0) # Needed because process is stochastic >>> np.random.seed(0) >>> # We will use the toy chromosome from toy_chromosome_config >>> print(toy_chromosome_config) {'sequence': 'ATACGGCACGTGACCGTCAACTTA', 'genes': {'oA': ['eA'], 'oAZ': ['eA', 'eZ'], 'oB': ['eB'], 'oBY': ['eB', 'eY']}, 'promoter_order': ['pA', 'pB'], 'promoters': {'pA': {'id': 'pA', 'position': 3, 'direction': 1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfA': <Quantity(0.3, 'millimolar')>}}], 'terminators': [{'position': 6, 'strength': 0.5, 'products': ['oA']}, {'position': 12, 'strength': 1.0, 'products': ['oAZ']}]}, 'pB': {'id': 'pB', 'position': -3, 'direction': -1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfB': <Quantity(0.5, 'millimolar')>}}], 'terminators': [{'position': -9, 'strength': 0.5, 'products': ['oB']}, {'position': -12, 'strength': 1.0, 'products': ['oBY']}]}}, 'promoter_affinities': {('pA', None): 1.0, ('pA', 'tfA'): 10.0, ('pB', None): 1.0, ('pB', 'tfB'): 10.0}, 'domains': {0: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'rnaps': {}} >>> monomer_ids = list(nucleotides.values()) >>> configuration = { ... 'promoter_affinities': { ... ('pA', None): 1.0, ... ('pA', 'tfA'): 10.0, ... ('pB', None): 1.0, ... ('pB', 'tfB'): 10.0}, ... 'transcription_factors': ['tfA', 'tfB'], ... 'sequence': toy_chromosome_config['sequence'], ... 'templates': toy_chromosome_config['promoters'], ... 'genes': toy_chromosome_config['genes'], ... 'elongation_rate': 10.0, ... 'polymerase_occlusion': 5, ... 'symbol_to_monomer': nucleotides, ... 'monomer_ids': monomer_ids, ... 'molecule_ids': monomer_ids, ... } >>> # At this point we haven't used the toy chromosome yet >>> # because it will be specified in the chromosome port. >>> # Notice that the parameters are specific to the chromosome. >>> transcription_process = Transcription(configuration) >>> # Now we need to initialize the simulation stores >>> state = { ... 'chromosome': toy_chromosome_config, ... 'molecules': { ... nucleotide: 10 ... for nucleotide in monomer_ids ... }, ... 'proteins': {UNBOUND_RNAP_KEY: 10}, ... 'factors': {'tfA': 0.2 * units.mM, 'tfB': 0.7 * units.mM}, ... } >>> update = transcription_process.next_update(1.0, state) >>> print(update['chromosome']) {'rnaps': {'_add': [{'path': (2,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 2, 'template': 'pA', 'template_index': 0, 'terminator': 1, 'domain': 0, 'state': 'polymerizing', 'position': 7}}, {'path': (3,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 3, 'template': 'pB', 'template_index': 1, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 3}}, {'path': (4,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 4, 'template': 'pA', 'template_index': 0, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 0}}], '_delete': []}, 'rnap_id': 4, 'domains': {0: <class 'vivarium.states.chromosome.Domain'>: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'root_domain': 0} ''' if not initial_parameters: initial_parameters = {} log.debug( 'inital transcription parameters: {}'.format(initial_parameters)) super(Transcription, self).__init__(initial_parameters) self.derive_defaults('templates', 'promoter_order', keys_list) self.derive_defaults('templates', 'transcript_ids', template_products) self.sequence = self.parameters['sequence'] self.templates = self.parameters['templates'] self.genes = self.parameters['genes'] empty_chromosome = Chromosome({ 'sequence': self.sequence, 'promoters': self.templates, 'genes': self.genes }) self.sequences = empty_chromosome.sequences() self.symbol_to_monomer = self.parameters['symbol_to_monomer'] log.debug('chromosome sequence: {}'.format(self.sequence)) self.promoter_affinities = self.parameters['promoter_affinities'] self.promoter_order = self.parameters['promoter_order'] self.promoter_count = len(self.promoter_order) self.transcription_factors = self.parameters['transcription_factors'] self.molecule_ids = self.parameters['molecule_ids'] self.molecule_ids.extend(['ATP', 'ADP']) self.monomer_ids = self.parameters['monomer_ids'] self.transcript_ids = self.parameters['transcript_ids'] self.elongation = 0 self.elongation_rate = self.parameters['elongation_rate'] self.polymerase_occlusion = self.parameters['polymerase_occlusion'] self.stoichiometry = build_stoichiometry(self.promoter_count) self.initiation = StochasticSystem(self.stoichiometry, random_seed=np.random.randint( 2**31)) self.protein_ids = [UNBOUND_RNAP_KEY] + self.transcription_factors self.initial_domains = self.parameters['initial_domains'] self.concentrations_deriver_key = self.parameters[ 'concentrations_deriver_key'] self.chromosome_ports = ['rnaps', 'rnap_id', 'domains', 'root_domain'] log.debug('final transcription parameters: {}'.format(self.parameters))
import json from arrow import StochasticSystem import numpy as np duration = 2**31 with open('data/complexation/large-initial.json') as f: data = json.load(f) stoich = np.array(data['stoich']) rates = np.array(data['rates']) * 1e-30 counts = np.array(data['counts']) system = StochasticSystem(stoich.T, random_seed=0) while True: result = system.evolve(duration, counts, rates) updated_counts = result['outcome'] if not np.any(counts - updated_counts): break if np.any(updated_counts < 0): raise Exception('Negative counts') counts = updated_counts print(counts)