class RegulatoryModule(obj_tables.Model): """ Knowledge about regulatory modules Attributes: id (:obj:`str`): identifier name (:obj:`str`): name gene (:obj:`GeneLocus`): gene promoter (:obj:`str`): promoter ensembl ID activity (:obj:`ActivityLevel`): cell-type specific activity level type (:obj:`RegulationType`): type of regulation (proximal or distal) transcription_factor_regulation (:obj:`TranscriptionFactorRegulation`): transcription factor and direction of regulation comments (:obj:`str`): comments references (:obj:`list` of :obj:`Reference`): references identifiers (:obj:`list` of :obj:`Identifier`): identifiers """ id = obj_tables.SlugAttribute(primary=True, unique=True) name = obj_tables.StringAttribute() gene = obj_tables.ManyToOneAttribute(GeneLocus, related_name='regulatory_modules') promoter = obj_tables.StringAttribute() activity = obj_tables.EnumAttribute(ActivityLevel) type = obj_tables.EnumAttribute(RegulationType) transcription_factor_regulation = RegDirectionAttribute( related_name='regulatory_modules') comments = obj_tables.LongStringAttribute() references = obj_tables.ManyToManyAttribute( core.Reference, related_name='regulatory_modules') identifiers = core.IdentifierAttribute(related_name='regulatory_modules') class Meta(obj_tables.Model.Meta): attribute_order = ('id', 'name', 'gene', 'promoter', 'activity', 'type', 'transcription_factor_regulation', 'identifiers', 'references', 'comments')
class PtmSite(core.PolymerLocus): """ Knowledge of protein modification sites Attributes: modified_protein (:obj:`ProteinSpeciesType`): modified protein type (:obj:`str`): type of modification (phosphorylation, methylation, etc...) modified_residue (:obj:`str`): residue name and position in protein sequence fractional_abundance (:obj:`int`): ratio of modified protein abundance """ type = obj_tables.StringAttribute() modified_protein = obj_tables.ManyToOneAttribute('ProteinSpeciesType', related_name='ptm_sites') modified_residue = obj_tables.StringAttribute() fractional_abundance = obj_tables.FloatAttribute() class Meta(obj_tables.Model.Meta): attribute_order = ('id', 'name', 'modified_protein', 'type', 'modified_residue', 'fractional_abundance', 'identifiers', 'references', 'comments')
class Transcript(obj_tables.Model): id = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Id') gene = obj_tables.ManyToOneAttribute('Gene', related_name='transcripts', verbose_name='Gene') location = obj_tables.OneToOneAttribute('Location', related_name='transcripts', verbose_name='Location') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'id', 'gene', 'location', ) verbose_name = 'Transcript' verbose_name_plural = 'Transcripts'
class Person(obj_tables.Model): name = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Name') type = obj_tables.EnumAttribute(['family', 'friend', 'business'], verbose_name='Type') company = obj_tables.ManyToOneAttribute('Company', related_name='employees', verbose_name='Company') email_address = obj_tables.EmailAttribute(verbose_name='Email address') phone_number = obj_tables.StringAttribute(verbose_name='Phone number') address = obj_tables.OneToOneAttribute('Address', related_name='person', verbose_name='Address') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'name', 'type', 'company', 'email_address', 'phone_number', 'address', ) verbose_name = 'Person' verbose_name_plural = 'People'
class TranscriptSpeciesType(core.PolymerSpeciesType): """ Knowledge of a transcript (spliced RNA) species Attributes: gene (:obj:`GeneLocus`): gene exons (:obj:`list` of :obj:`LocusAttribute`): exon coordinates type (:obj:`TranscriptType`): type Related attributes: protein (:obj:`ProteinSpeciesType`): protein """ gene = obj_tables.ManyToOneAttribute(GeneLocus, related_name='transcripts') exons = LocusAttribute(related_name='transcripts') type = obj_tables.EnumAttribute(TranscriptType) class Meta(obj_tables.Model.Meta): verbose_name = 'Transcript' verbose_name_plural = 'Transcripts' attribute_order = ('id', 'name', 'gene', 'exons', 'type', 'identifiers', 'references', 'comments') def get_seq(self): """ Get the 5' to 3' sequence Returns: :obj:`Bio.Seq.Seq`: sequence """ ordered_exons = sorted(self.exons, key=lambda x: x.start) dna_seq = self.gene.polymer.get_subseq(start=ordered_exons[0].start, end=ordered_exons[-1].end) adjusted_exons = [(i.start - ordered_exons[0].start, i.end - ordered_exons[0].start + 1) \ for i in ordered_exons] spliced_dna_seq = Bio.Seq.Seq('', alphabet=Bio.Alphabet.DNAAlphabet()) for exon in adjusted_exons: spliced_dna_seq += dna_seq[exon[0]:exon[1]] if self.gene.strand == core.PolymerStrand.negative: spliced_dna_seq = spliced_dna_seq.reverse_complement() return spliced_dna_seq.transcribe() def get_empirical_formula(self, seq_input=None): """ Get the empirical formula for a transcript (spliced RNA) species with * 5' monophosphate * Deprotonated phosphate oxygens :math:`N_A * AMP + N_C * CMP + N_G * GMP + N_U * UMP - (L-1) * OH` Args: seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ if seq_input: seq = seq_input else: seq = self.get_seq() n_a = seq.upper().count('A') n_c = seq.upper().count('C') n_g = seq.upper().count('G') n_u = seq.upper().count('U') l = len(seq) formula = chem.EmpiricalFormula() formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 9 * n_u formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 11 * n_u - (l - 1) formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_u formula.O = 7 * n_a + 8 * n_c + 8 * n_g + 9 * n_u - (l - 1) formula.P = n_a + n_c + n_g + n_u return formula def get_charge(self, seq_input=None): """ Get the charge for a transcript (spliced RNA) species with * 5' monophosphate * Deprotonated phosphate oxygens :math:`-L - 1` Args: seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`int`: charge """ if seq_input: length = len(seq_input) else: length = len(self.get_seq()) return -length - 1 def get_mol_wt(self, seq_input=None): """ Get the molecular weight for a transcript (spliced RNA) species with * 5' monophosphate * Deprotonated phosphate oxygens Args: seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`float`: molecular weight (Da) """ if seq_input: return self.get_empirical_formula( seq_input=seq_input).get_molecular_weight() else: return self.get_empirical_formula().get_molecular_weight()
class TranscriptionFactorRegulation(obj_tables.Model): """ Transcription factor and the direction of transcriptional regulation Attributes: transcription_factor (:obj:`ProteinSpeciesType`): transcription factor direction (:obj:`RegulatoryDirection`): regulatory direction Related attributes: regulatory_modules (:obj:`list` of `RegulatoryModule`): regulatory modules """ transcription_factor = obj_tables.ManyToOneAttribute( 'ProteinSpeciesType', related_name='transcription_factor_regulation') direction = obj_tables.EnumAttribute(RegulatoryDirection) class Meta(obj_tables.Model.Meta): attribute_order = ('transcription_factor', 'direction') frozen_columns = 1 table_format = obj_tables.TableFormat.cell ordering = ('transcription_factor', 'direction') @staticmethod def _serialize(transcription_factor_id, direction_name): """ Generate string representation Args: transcription_factor_id (:obj:`str`): transcription factor id direction_name (:obj:`str`): regulatory direction name Returns: :obj:`str`: value of primary attribute """ return '{}:{}'.format(transcription_factor_id, direction_name) def serialize(self): """ Generate string representation Returns: :obj:`str`: value of primary attribute """ return self._serialize(self.transcription_factor.id, self.direction.name) @classmethod def deserialize(cls, value, objects): """ Deserialize value Args: value (:obj:`str`): String representation objects (:obj:`dict`): dictionary of objects, grouped by model Returns: :obj:`tuple` of `list` of `TranscriptionFactorRegulation`, `InvalidAttribute` or `None`: tuple of cleaned value and cleaning error """ if cls in objects and value in objects[cls]: return (objects[cls][value], None) tf_id = ProteinSpeciesType.id.pattern[1:-1] direction = '|'.join(rd.name for rd in RegulatoryDirection) pattern = r'^({}) *\: *({})$'.format(tf_id, direction) match = re.match(pattern, value, flags=re.I) if match: errors = [] tf_reg_str = match.group(1) if ProteinSpeciesType in objects and tf_reg_str in objects[ ProteinSpeciesType]: transcription_factor = objects[ProteinSpeciesType][tf_reg_str] else: errors.append( 'Undefined transcription factor "{}"'.format(tf_reg_str)) direction_str = match.group(match.lastindex) if direction_str in [i.name for i in list(RegulatoryDirection)]: direction = [ i for i in list(RegulatoryDirection) if i.name == direction_str ][0] else: errors.append('Undefined regulatory direction "{}"'.format( direction_str)) if errors: return (None, obj_tables.InvalidAttribute(cls, errors)) else: obj = cls(transcription_factor=transcription_factor, direction=direction) if cls not in objects: objects[cls] = {} objects[cls][obj.serialize()] = obj return (obj, None) return (None, obj_tables.InvalidAttribute( cls, ['Invalid transcription factor regulation']))