def test_getItem_indicies(self): index = [0, 2] self.assertEqual(self.us_array[index], UnitStructArray(self.struct_array[index], self.units)) index = [True, False, True] self.assertEqual(self.us_array[index], UnitStructArray(self.struct_array[index], self.units))
def _buildAllMasses(self, raw_data, sim_data): size = len(raw_data.rnas) + len(raw_data.proteins) + len( raw_data.proteinComplexes) + len(raw_data.metabolites) + len( raw_data.modifiedForms) + len(raw_data.polymerized) + len( raw_data.water) + len(raw_data.chromosome) allMass = np.empty(size, dtype=[('id', 'a50'), ('mass', "f8")]) listMass = [] listMass.extend([(x['id'], np.sum(x['mw'])) for x in raw_data.rnas]) listMass.extend([(x['id'], np.sum(x['mw'])) for x in raw_data.proteins]) listMass.extend([(x['id'], np.sum(x['mw'])) for x in raw_data.proteinComplexes]) listMass.extend([(x['id'], np.sum(x['mw7.2'])) for x in raw_data.metabolites]) listMass.extend([(x['id'], np.sum(x['mw7.2'])) for x in raw_data.modifiedForms]) listMass.extend([(x['id'], np.sum(x['mw'])) for x in raw_data.polymerized]) listMass.extend([(x['id'], np.sum(x['mw7.2'])) for x in raw_data.water]) listMass.extend([(x['id'], np.sum(x['mw'])) for x in raw_data.chromosome]) allMass[:] = listMass field_units = { 'id': None, 'mass': units.g / units.mol, } self._allMass = UnitStructArray(allMass, field_units) # TODO: change to dict?
def test_init(self): with self.assertRaises(Exception) as context: UnitStructArray(1., {'hello': 'goodbye'}) self.assertEqual( context.exception.message, 'UnitStructArray must be initialized with a numpy array!\n') with self.assertRaises(Exception) as context: UnitStructArray(self.struct_array, 'foo') self.assertEqual( context.exception.message, 'UnitStructArray must be initialized with a dict storing units!\n') with self.assertRaises(Exception) as context: self.units['hi'] = 'bye' UnitStructArray(self.struct_array, self.units) self.assertEqual(context.exception.message, 'Struct array fields do not match unit fields!\n')
def __init__(self, raw_data, sim_data): environmentData = np.zeros(0, dtype=[ ("id", "a50"), ]) # Add units to values field_units = { "id": None, } self.environmentData = UnitStructArray(environmentData, field_units)
def __init__(self, raw_data, sim_data): bulkData = np.zeros(0, dtype=[ ("id", "a50"), ("mass", "{}f8".format( len(sim_data.molecular_weight_order))), ]) # Add units to values field_units = { "id": None, "mass": units.g / units.mol, } self.bulkData = UnitStructArray(bulkData, field_units)
def __init__(self, raw_data, sim_data): self.uniqueMoleculeDefinitions = collections.OrderedDict() uniqueMoleculeMasses = np.zeros(0, dtype = [ ("id", "a50"), ("mass", "{}f8".format(len(sim_data.molecular_weight_order))), ] ) field_units = { "id" : None, "mass" : units.g / units.mol } self.uniqueMoleculeMasses = UnitStructArray(uniqueMoleculeMasses, field_units)
def addToStateCommon(bulkState, ids, masses): newAddition = np.zeros( len(ids), dtype=[ ("id", "a50"), ("mass", "{}f8".format( masses.asNumber().shape[1])), # TODO: Make this better ]) bulkState.units['mass'].matchUnits(masses) newAddition["id"] = ids newAddition["mass"] = masses.asNumber() return UnitStructArray(np.hstack((bulkState.fullArray(), newAddition)), bulkState.units)
def _buildRnaData(self, raw_data, sim_data): assert all([len(rna['location']) == 1 for rna in raw_data.rnas]) rnaIds = [ '{}[{}]'.format(rna['id'], rna['location'][0]) for rna in raw_data.rnas if len(rna['location']) == 1 ] rnaDegRates = np.log(2) / np.array( [rna['halfLife'] for rna in raw_data.rnas]) # TODO: units rnaLens = np.array([len(rna['seq']) for rna in raw_data.rnas]) ntCounts = np.array([(rna['seq'].count('A'), rna['seq'].count('C'), rna['seq'].count('G'), rna['seq'].count('U')) for rna in raw_data.rnas]) # Load expression from RNA-seq data expression = [] for rna in raw_data.rnas: arb_exp = [ x[sim_data.basal_expression_condition] for x in eval("raw_data.rna_seq_data.rnaseq_{}_mean".format( RNA_SEQ_ANALYSIS)) if x['Gene'] == rna['geneId'] ] if len(arb_exp): expression.append(arb_exp[0]) elif rna['type'] == 'mRNA' or rna['type'] == 'miscRNA': raise Exception('No RNA-seq data found for {}'.format( rna['id'])) elif rna['type'] == 'rRNA' or rna['type'] == 'tRNA': expression.append(0.) else: raise Exception('Unknonw RNA {}'.format(rna['id'])) expression = np.array(expression) synthProb = expression * ( np.log(2) / sim_data.doubling_time.asNumber(units.s) + rnaDegRates) synthProb /= synthProb.sum() KcatEndoRNase = 0.001 EstimateEndoRNases = 5000 Km = (KcatEndoRNase * EstimateEndoRNases / rnaDegRates) - expression mws = np.array([rna['mw'] for rna in raw_data.rnas]).sum(axis=1) geneIds = np.array([rna['geneId'] for rna in raw_data.rnas]) size = len(rnaIds) is23S = np.zeros(size, dtype=np.bool) is16S = np.zeros(size, dtype=np.bool) is5S = np.zeros(size, dtype=np.bool) for rnaIndex, rna in enumerate(raw_data.rnas): if rna["type"] == "rRNA" and rna["id"].startswith("RRL"): is23S[rnaIndex] = True if rna["type"] == "rRNA" and rna["id"].startswith("RRS"): is16S[rnaIndex] = True if rna["type"] == "rRNA" and rna["id"].startswith("RRF"): is5S[rnaIndex] = True sequences = [rna['seq'] for rna in raw_data.rnas] maxSequenceLength = max(len(sequence) for sequence in sequences) monomerIds = [x['monomerId'] for x in raw_data.rnas] # TODO: Add units rnaData = np.zeros( size, dtype=[ ('id', 'a50'), # ('synthProb', 'f8'), # ('expression', 'float64'), ('degRate', 'f8'), ('length', 'i8'), ('countsACGU', '4i8'), ('mw', 'f8'), ('isMRna', 'bool'), ('isMiscRna', 'bool'), ('isRRna', 'bool'), ('isTRna', 'bool'), ('isRRna23S', 'bool'), ('isRRna16S', 'bool'), ('isRRna5S', 'bool'), ('isRProtein', 'bool'), ('isRnap', 'bool'), ('sequence', 'a{}'.format(maxSequenceLength)), ('geneId', 'a50'), ('KmEndoRNase', 'f8'), ]) rnaData['id'] = rnaIds # rnaData["synthProb"] = synthProb # rnaData["expression"] = expression rnaData['degRate'] = rnaDegRates rnaData['length'] = rnaLens rnaData['countsACGU'] = ntCounts rnaData['mw'] = mws rnaData['isMRna'] = [rna["type"] == "mRNA" for rna in raw_data.rnas] rnaData['isMiscRna'] = [ rna["type"] == "miscRNA" for rna in raw_data.rnas ] rnaData['isRRna'] = [rna["type"] == "rRNA" for rna in raw_data.rnas] rnaData['isTRna'] = [rna["type"] == "tRNA" for rna in raw_data.rnas] rnaData['isRProtein'] = [ "{}[c]".format(x) in sim_data.moleculeGroups.rProteins for x in monomerIds ] rnaData['isRnap'] = [ "{}[c]".format(x) in sim_data.moleculeGroups.rnapIds for x in monomerIds ] rnaData['isRRna23S'] = is23S rnaData['isRRna16S'] = is16S rnaData['isRRna5S'] = is5S rnaData['sequence'] = sequences rnaData['geneId'] = geneIds rnaData['KmEndoRNase'] = Km field_units = { 'id': None, # 'synthProb' : None, # 'expression' : None, 'degRate': 1 / units.s, 'length': units.nt, 'countsACGU': units.nt, 'mw': units.g / units.mol, 'isMRna': None, 'isMiscRna': None, 'isRRna': None, 'isTRna': None, 'isRRna23S': None, 'isRRna16S': None, 'isRRna5S': None, 'isRProtein': None, 'isRnap': None, 'sequence': None, 'geneId': None, 'KmEndoRNase': units.mol / units.L, } self.rnaExpression = {} self.rnaSynthProb = {} self.rnaExpression["basal"] = expression / expression.sum() self.rnaSynthProb["basal"] = synthProb / synthProb.sum() self.rnaData = UnitStructArray(rnaData, field_units)
def _buildMonomerData(self, raw_data, sim_data): assert all( [len(protein['location']) == 1 for protein in raw_data.proteins]) ids = [ '{}[{}]'.format(protein['id'], protein['location'][0]) for protein in raw_data.proteins ] rnaIds = [] for protein in raw_data.proteins: rnaId = protein['rnaId'] rnaLocation = None for rna in raw_data.rnas: if rna['id'] == rnaId: assert len(rna['location']) == 1 rnaLocation = rna['location'][0] break rnaIds.append('{}[{}]'.format(rnaId, rnaLocation)) lengths = [] aaCounts = [] sequences = [] for protein in raw_data.proteins: sequence = protein['seq'] counts = [] for aa in sim_data.amino_acid_1_to_3_ordered.viewkeys(): counts.append(sequence.count(aa)) lengths.append(len(sequence)) aaCounts.append(counts) sequences.append(sequence) maxSequenceLength = max(len(seq) for seq in sequences) mws = np.array([protein['mw'] for protein in raw_data.proteins]).sum(axis=1) size = len(rnaIds) nAAs = len(aaCounts[0]) # Calculate degradation rates based on N-rule # TODO: citation fastRate = (np.log(2) / (2 * units.min)).asUnit(1 / units.s) slowRate = (np.log(2) / (10 * 60 * units.min)).asUnit(1 / units.s) fastAAs = ["R", "K", "F", "L", "W", "Y"] slowAAs = [ "H", "I", "D", "E", "N", "Q", "C", "A", "S", "T", "G", "V", "M" ] noDataAAs = ["P", "U"] NruleDegRate = {} NruleDegRate.update((fastAA, fastRate) for fastAA in fastAAs) NruleDegRate.update((slowAA, slowRate) for slowAA in slowAAs) NruleDegRate.update( (noDataAA, slowRate) for noDataAA in noDataAAs) # Assumed slow rate because of no data # Build list of ribosomal proteins # Give all ribosomal proteins the slowAA rule ribosomalProteins = [] ribosomalProteins.extend( [x[:-3] for x in sim_data.moleculeGroups.s30_proteins]) ribosomalProteins.extend( [x[:-3] for x in sim_data.moleculeGroups.s50_proteins]) degRate = np.zeros(len(raw_data.proteins)) for i, m in enumerate(raw_data.proteins): if m['id'] not in ribosomalProteins: degRate[i] = NruleDegRate[m['seq'][0]].asNumber() else: degRate[i] = slowRate.asNumber() monomerData = np.zeros(size, dtype=[ ('id', 'a50'), ('rnaId', 'a50'), ('degRate', 'f8'), ('length', 'i8'), ('aaCounts', '{}i8'.format(nAAs)), ('mw', 'f8'), ('sequence', 'a{}'.format(maxSequenceLength)), ]) monomerData['id'] = ids monomerData['rnaId'] = rnaIds monomerData['degRate'] = degRate monomerData['length'] = lengths monomerData['aaCounts'] = aaCounts monomerData['mw'] = mws monomerData['sequence'] = sequences field_units = { 'id': None, 'rnaId': None, 'degRate': 1 / units.s, 'length': units.aa, 'aaCounts': units.aa, 'mw': units.g / units.mol, 'sequence': None } self.monomerData = UnitStructArray(monomerData, field_units)
def test_getItem_slice(self): self.assertEqual(self.us_array[:1], UnitStructArray(self.struct_array[:1], self.units))
def setUp(self): self.struct_array = np.zeros(3, dtype=[('id', 'a10'), ('mass', np.float64)]) self.units = {'id': None, 'mass': g} self.us_array = UnitStructArray(self.struct_array, self.units)
class Test_unit_struct_array(unittest.TestCase): @classmethod def setUpClass(cls): pass @classmethod def tearDownClass(cls): pass def setUp(self): self.struct_array = np.zeros(3, dtype=[('id', 'a10'), ('mass', np.float64)]) self.units = {'id': None, 'mass': g} self.us_array = UnitStructArray(self.struct_array, self.units) def tearDown(self): pass @noseAttrib.attr('smalltest', 'unitstructarray') def test_init(self): with self.assertRaises(Exception) as context: UnitStructArray(1., {'hello': 'goodbye'}) self.assertEqual( context.exception.message, 'UnitStructArray must be initialized with a numpy array!\n') with self.assertRaises(Exception) as context: UnitStructArray(self.struct_array, 'foo') self.assertEqual( context.exception.message, 'UnitStructArray must be initialized with a dict storing units!\n') with self.assertRaises(Exception) as context: self.units['hi'] = 'bye' UnitStructArray(self.struct_array, self.units) self.assertEqual(context.exception.message, 'Struct array fields do not match unit fields!\n') @noseAttrib.attr('smalltest', 'unitstructarray') def test_field(self): self.assertTrue(self.us_array['id'].tolist(), self.struct_array['id'].tolist()) self.assertTrue( (self.us_array['mass'] == g * self.struct_array['mass']).all()) @noseAttrib.attr('smalltest', 'unitstructarray') def test_fullArray(self): self.assertTrue((self.us_array.fullArray() == self.struct_array).all()) @noseAttrib.attr('smalltest', 'unitstructarray') def test_fullUnits(self): self.assertEqual(self.us_array.fullUnits(), self.units) @noseAttrib.attr('smalltest', 'unitstructarray') def test_getItem_slice(self): self.assertEqual(self.us_array[:1], UnitStructArray(self.struct_array[:1], self.units)) @noseAttrib.attr('smalltest', 'unitstructarray') def test_getItem_indicies(self): index = [0, 2] self.assertEqual(self.us_array[index], UnitStructArray(self.struct_array[index], self.units)) index = [True, False, True] self.assertEqual(self.us_array[index], UnitStructArray(self.struct_array[index], self.units)) @noseAttrib.attr('smalltest', 'unitstructarray') def test_getItem_singleindex(self): self.assertEqual(self.us_array[0], self.struct_array[0]) @noseAttrib.attr('smalltest', 'unitstructarray') def test_setItem_quantity_with_units(self): self.us_array['mass'] = g * np.array([1., 2., 3.]) self.assertTrue( (self.us_array['mass'] == g * np.array([1., 2., 3.])).all()) with self.assertRaises(Exception) as context: self.us_array['mass'] = mol * np.array([1., 2., 3.]) self.assertEqual(context.exception.message, 'Units do not match!\n') @noseAttrib.attr('smalltest', 'unitstructarray') def test_setItem_quantity_no_units(self): self.us_array['id'] = ['nick', 'derek', 'john'] self.assertTrue( (self.us_array['id'] == np.array(['nick', 'derek', 'john'])).all()) with self.assertRaises(Exception) as context: self.us_array['mass'] = [1, 2, 3] self.assertEqual( context.exception.message, 'Units do not match! Quantity has units your input does not!\n')