class Gene(BaseDS): __tablename__ = 'Gene' gene_id = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) chromosome = Column( String( 15, collation=SQLCollationManager.get_instance().get_db_collation())) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = (PrimaryKeyConstraint('gene_id'), ) # Define the one-to-many relationships DSTranscript_list = relationship('DSTranscript', cascade='all', backref='Gene') GeneAlias_list = relationship('GeneAlias', cascade='all', backref='Gene') ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: Gene - Another Gene object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.gene_id == other.gene_id): return True else: return False ## __hash__ # -------- # # Returns the hash value of a Gene object. # The hash value of an instance is computed using its primary key. # # @return the hash value of the Gene object. # def __hash__(self): return hash(self.gene_id)
class GeneAlias(BaseDS): __tablename__ = 'GeneAlias' gene_id = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('Gene.gene_id', ondelete='CASCADE', onupdate='CASCADE')) alias = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) crossref = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation())) # Define composite primary key __table_args__ = (PrimaryKeyConstraint('gene_id', 'alias'), ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: GeneAlias - Another GeneAlias object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif ((self.gene_id == other.gene_id) and (self.alias == other.alias)): return True else: return False ## __hash__ # -------- # # Returns the hash value of a GeneAlias object. # The hash value of an instance is computed using its primary key. # # @return the hash value of the GeneAlias object. # def __hash__(self): return hash((self.gene_id, self.alias))
class ORFAnnotationCatalog( BasePRO ): __tablename__ = 'ORFAnnotationCatalog' annotation = Column( String( 50, collation = SQLCollationManager.get_instance().get_db_collation() ), primary_key = True ) family = Column( String( 50, collation = SQLCollationManager.get_instance().get_db_collation() ) ) # Define one-to-many relationship ORFAnnotation_list = relationship( 'ORFAnnotation', cascade = 'all', backref = 'ORFAnnotationCatalog' )
class UTGeneFromAlias(BaseDS): # ============================================================================= # /!\ This table is build from data in Gene and GeneAlias tables. It associates # to each alias a list of genes that may correspond to it. # ============================================================================= __tablename__ = 'UTGeneFromAlias' alias = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) gene_ids = Column( Text(collation=SQLCollationManager.get_instance().get_db_collation())) # Define the primary key attributes __table_args__ = (PrimaryKeyConstraint('alias'), ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: UTGeneFromAlias - Another UTGeneFromAlias object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.alias == other.alias): return True else: return False ## __hash__ # -------- # # Returns the hash value of a UTGeneFromAlias object. # The hash value of an instance is computed using its primary key attribute. # # @return the hash value of the UTGeneFromAlias object. # def __hash__(self): return hash(self.alias)
class Metadata(BaseDS): __tablename__ = 'Metadata' parameter = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) value = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) description = Column( Text(collation=SQLCollationManager.get_instance().get_db_collation())) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = (PrimaryKeyConstraint('parameter'), ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are equal. # # @param other: Metadata - Another Metadata object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.parameter == other.parameter): return True else: return False ## __hash__ # -------- # # Returns the hash value of a Metadata object. # The hash value of an instance is computed using its primary key. # # @return the hash value of the Metadata object. # def __hash__(self): return hash(self.parameter)
class ORFAnnotation( BasePRO ): __tablename__ = 'ORFAnnotation' orftranscriptasso_id = Column( Integer, ForeignKey( 'ORFTranscriptAsso.id', ondelete='CASCADE', onupdate='CASCADE' ) ) orf_annotation = Column( String( 50, collation = SQLCollationManager.get_instance().get_db_collation() ), ForeignKey( 'ORFAnnotationCatalog.annotation', ondelete='CASCADE', onupdate='CASCADE' ) ) criteria = Column( String( 50, collation = SQLCollationManager.get_instance().get_db_collation() ) ) # Define the composite primary key __table_args__ = ( PrimaryKeyConstraint( 'orftranscriptasso_id', 'orf_annotation', 'criteria' ), )
class PROMetadata(BasePRO): __tablename__ = 'PROMetadata' parameter = Column(String( 255, collation=SQLCollationManager.get_instance().get_db_collation()), primary_key=True) value = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) description = Column( Text(collation=SQLCollationManager.get_instance().get_db_collation())) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: PROMetadata - Another PROMetadata object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.parameter == other.parameter): return True else: return False ## __hash__ # -------- # # Returns the hash value of a PROMetadata object. # The hash value of an instance is computed using its primary key. # # @return the hash value of the PROMetadata object. # def __hash__(self): return hash(self.parameter)
class UTBEDContent(BasePRO): __tablename__ = 'UTBEDContent' orf_id = Column(Integer, ForeignKey('ORF.id', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True) bed_index = Column(Integer) bed_col = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) bed_add_col = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation()))
class SpeciesCatalog(BaseDS): __tablename__ = 'SpeciesCatalog' name = Column(String( 50, collation=SQLCollationManager.get_instance().get_db_collation()), primary_key=True)
class PROGeneAlias(BasePRO): __tablename__ = 'PROGeneAlias' gene_id = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('PROGene.gene_id', ondelete='CASCADE', onupdate='CASCADE')) alias = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) # Define composite primary key __table_args__ = (PrimaryKeyConstraint('gene_id', 'alias'), )
class CellContext(BasePRO): __tablename__ = 'CellContext' orftranscriptasso_id = Column( Integer, ForeignKey('ORFTranscriptAsso.id', ondelete='CASCADE', onupdate='CASCADE')) cell_context = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('CellContextCatalog.context', ondelete='CASCADE', onupdate='CASCADE')) # Define the composite primary key __table_args__ = (PrimaryKeyConstraint('orftranscriptasso_id', 'cell_context'), ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their "primary key-like" attributes # (i.e. the attributes with unique constraint) are all equals. # # @param other: CellContext - Another CellContext object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif ((self.orftranscriptasso_id == other.orftranscriptasso_id) and (self.cell_context == other.cell_context)): return True else: return False ## __hash__ # -------- # # Returns the hash value of a CellContext object. # The hash value of an instance is computed using its "primary key-like" attributes # (i.e. the attributes with unique constraint). # # @return the hash value of the CellContext object. # def __hash__(self): return hash((self.orftranscriptasso_id, self.cell_context))
class ORFTranscriptAssoDSAsso( BasePRO ): __tablename__ = 'ORFTranscriptAssoDSAsso' orftranscriptasso_id = Column( Integer, ForeignKey( 'ORFTranscriptAsso.id', ondelete='CASCADE', onupdate='CASCADE' ) ) dsorftranscriptasso_id = Column( Integer ) data_source = Column( String( 50, collation = SQLCollationManager.get_instance().get_db_collation() ) ) # Define the composite primary key __table_args__ = ( PrimaryKeyConstraint( 'orftranscriptasso_id', 'dsorftranscriptasso_id' ), )
class ORFDSAsso(BasePRO): __tablename__ = 'ORFDSAsso' orf_id = Column( Integer, ForeignKey('ORF.id', ondelete='CASCADE', onupdate='CASCADE')) dsorf_id = Column(Integer) data_source = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation())) ambiguous = Column(Boolean) # Define unique constraint on "primary key-like attributes" __table_args__ = (PrimaryKeyConstraint('orf_id', 'dsorf_id'), )
class UTRNABiotypeCatalog(BasePRO): # ============================================================================= # /!\ This table is filled during the ComputeMissingInfo step. # ============================================================================= __tablename__ = 'UTRNABiotypeCatalog' biotype = Column(String( 255, collation=SQLCollationManager.get_instance().get_db_collation()), primary_key=True) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: UTRNABiotypeCatalog - Another UTRNABiotypeCatalog object # to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.biotype == other.biotype): return True else: return False ## __hash__ # -------- # # Returns the hash value of a UTRNABiotypeCatalog object. # The hash value of an instance is computed using its primary key attribute. # # @return the hash value of the UTRNABiotypeCatalog object. # def __hash__(self): return hash(self.biotype)
class CellContextCatalog(BasePRO): __tablename__ = 'CellContextCatalog' context = Column(String( 50, collation=SQLCollationManager.get_instance().get_db_collation()), primary_key=True) # Define the one-to-many relationship CellContext_list = relationship('CellContext', cascade='all', backref='CellContextCatalog') ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: CellContextCatalog - Another CellContextCatalog object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.context == other.context): return True else: return False ## __hash__ # -------- # # Returns the hash value of a CellContextCatalog object. # The hash value of an instance is computed using its primary key attribute. # # @return the hash value of the CellContextCatalog object. # def __hash__(self): return hash(self.context)
class TranscriptDSAsso(BasePRO): __tablename__ = 'TranscriptDSAsso' transcript_id = Column( Integer, ForeignKey('Transcript.id', ondelete='CASCADE', onupdate='CASCADE')) dstranscript_id = Column(Integer) data_source = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation())) ambiguous = Column(Boolean) # Define the composite primary key __table_args__ = (PrimaryKeyConstraint('transcript_id', 'dstranscript_id'), )
class Transcript(BasePRO): __tablename__ = 'Transcript' id = Column(Integer) transcript_id = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) transcript_name = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) gene_id = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('PROGene.gene_id', ondelete='CASCADE', onupdate='CASCADE')) strand = Column( String( 2, collation=SQLCollationManager.get_instance().get_db_collation())) start_pos = Column(Integer) end_pos = Column(Integer) sequence = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) cds_start_pos = Column(Integer) cds_stop_pos = Column(Integer) cds_genomic_length = Column(Integer) rel_cds_start_pos = Column(Integer) rel_cds_stop_pos = Column(Integer) rna_biotype = Column( String( 100, collation=SQLCollationManager.get_instance().get_db_collation())) count_ds = Column(Integer) count_ds_ambiguous = Column(Integer) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = ( PrimaryKeyConstraint('id'), UniqueConstraint('transcript_id', 'gene_id'), ) # Define the one-to-many relationship TranscriptDSAsso_list = relationship('TranscriptDSAsso', cascade='all', backref='Transcript') ORFTranscriptAsso_list = relationship('ORFTranscriptAsso', cascade='all', backref='Transcript')
class FLOSSClass(BasePRO): __tablename__ = 'FLOSSClass' orftranscriptasso_id = Column( Integer, ForeignKey('ORFTranscriptAsso.id', ondelete='CASCADE', onupdate='CASCADE')) floss_class = Column( String( 100, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('FLOSSClassCatalog.floss_class', ondelete='CASCADE', onupdate='CASCADE')) # Define composite primary key __table_args__ = (PrimaryKeyConstraint('orftranscriptasso_id', 'floss_class'), )
class ProvidedCategory(BasePRO): __tablename__ = 'ProvidedCategory' orftranscriptasso_id = Column( Integer, ForeignKey('ORFTranscriptAsso.id', ondelete='CASCADE', onupdate='CASCADE')) provided_category = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('ProvidedCategoryCatalog.category', ondelete='CASCADE', onupdate='CASCADE')) # Define the composite primary key __table_args__ = (PrimaryKeyConstraint('orftranscriptasso_id', 'provided_category'), )
class ORFTranscriptAsso(BasePRO): __tablename__ = 'ORFTranscriptAsso' id = Column(Integer, autoincrement=True) orf_id = Column( Integer, ForeignKey('ORF.id', ondelete='CASCADE', onupdate='CASCADE')) transcript_id = Column( Integer, ForeignKey('Transcript.id', ondelete='CASCADE', onupdate='CASCADE')) rel_start_pos = Column(Integer) rel_stop_pos = Column(Integer) predicted = Column(Boolean) ribo_seq = Column(Boolean) ms_info = Column(Boolean) start_codon_seq = Column( String( 3, collation=SQLCollationManager.get_instance().get_db_collation())) start_flanking_seq = Column( String( 20, collation=SQLCollationManager.get_instance().get_db_collation())) sequence_nt = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) sequence_aa = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) kozak_context = Column(Boolean) kozak_context_comp = Column( String( 30, collation=SQLCollationManager.get_instance().get_db_collation())) length_nt_min = Column(Integer) length_nt_max = Column(Integer) length_nt_median = Column(Float) length_nt_values = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) length_aa_min = Column(Integer) length_aa_max = Column(Integer) length_aa_median = Column(Float) length_aa_values = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) orf_score_min = Column(Float) orf_score_max = Column(Float) orf_score_median = Column(Float) orf_score_values = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) phylocsf_min = Column(Float) phylocsf_max = Column(Float) phylocsf_median = Column(Float) phylocsf_values = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) phastcons_min = Column(Float) phastcons_max = Column(Float) phastcons_median = Column(Float) phastcons_values = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) floss_min = Column(Float) floss_max = Column(Float) floss_median = Column(Float) floss_values = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) count_ds = Column(Integer) count_computed_clusters = Column(Integer) count_prov_lengths = Column(Integer) computed_clusters = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) gen_len_eq_orf_len = Column(Boolean) # Define the unique constraint on "primary key-like attributes" __table_args__ = ( PrimaryKeyConstraint('id'), UniqueConstraint('orf_id', 'transcript_id'), ) # Define the one-to-many relationships ORFTranscriptAssoDSAsso_list = relationship( 'ORFTranscriptAssoDSAsso', cascade='all', backref='ORFTranscriptAssoDSAsso') CellContext_list = relationship('CellContext', cascade='all', backref='ORFTranscriptAssoDSAsso') ProvidedCategory_list = relationship('ProvidedCategory', cascade='all', backref='ORFTranscriptAssoDSAsso') FLOSSClass_list = relationship('FLOSSClass', cascade='all', backref='ORFTranscriptAssoDSAsso') ORFCategory_list = relationship('ORFCategory', cascade='all', backref='ORFTranscriptAsso') ORFAnnotation_list = relationship('ORFAnnotation', cascade='all', backref='ORFTranscriptAsso')
class DSORFTranscriptAsso(BaseDS): __tablename__ = 'DSORFTranscriptAsso' id = Column(Integer) data_source = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('DataSource.name', ondelete='CASCADE', onupdate='CASCADE')) transcript_id = Column( Integer, ForeignKey('DSTranscript.id', ondelete='CASCADE', onupdate='CASCADE')) uniq_orf_id = Column( Integer, ForeignKey('DSORF.id', ondelete='CASCADE', onupdate='CASCADE')) predicted = Column(Boolean) ribo_seq = Column(Boolean) cell_context = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) orf_id = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) start_codon_seq = Column( String( 3, collation=SQLCollationManager.get_instance().get_db_collation())) raw_sequence = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) raw_sequence_aa = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) kozak_context = Column( String( 100, collation=SQLCollationManager.get_instance().get_db_collation())) orf_length_nt = Column(Integer) orf_length = Column(Integer) provided_category = Column( String( 100, collation=SQLCollationManager.get_instance().get_db_collation())) ms_info = Column(Boolean) orf_score = Column(Float) phylocsf = Column(Float) phastcons = Column(Float) floss = Column(Float) floss_class = Column( String( 100, collation=SQLCollationManager.get_instance().get_db_collation())) gen_len_eq_orf_len = Column(Boolean) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = ( PrimaryKeyConstraint('id'), UniqueConstraint('data_source', 'transcript_id', 'uniq_orf_id', 'predicted', 'ribo_seq', 'cell_context'), ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their "primary key-like" attributes # (i.e. the attributes with unique constraint) are all equals. # # @param other: DSORFTranscriptAsso - Another DSORFTranscriptAsso object to # compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif ((self.uniq_orf_id == other.uniq_orf_id) and (self.transcript_id == other.transcript_id) and (self.data_source == other.data_source) and (self.cell_context == other.cell_context) and (self.predicted == other.predicted) and (self.ribo_seq == other.ribo_seq)): return True else: return False ## __hash__ # -------- # # Returns the hash value of a DSORFTranscriptAsso object. # The hash value of an instance is computed using its "primary key-like" attributes # (i.e. the attributes with unique constraint). # # @return the hash value of the DSTranscript object. # def __hash__(self): return hash((self.data_source, self.transcript_id, self.uniq_orf_id, self.predicted, self.ribo_seq, self.cell_context)) ## update # ------ # # This method try to combine two instances of this class when they are "equals" # (i.e. share the same values for "primary key-like attributes") by getting the # missing attributes. It tests the equality between non-primary-key attributes # of two equal instances of this class, and raise an DenCellORFObjUpdateException # if DSORFTranscriptAsso compared have different values for their non-primary key # attributes. # If: # - The self object is missing some values the other object has, then it updates # the current object. # - The information provided by both objects are contradictory, it logs a warning. # # @param other: DSORFTranscriptAsso - Another DSORFTranscriptAsso object "equal" to the # current one. # # @throw DenCellORFObjUpdateException: When different values are found for the same attributes # of two objects assumed to be the same. # def update(self, other): # For each non-primary-key attribute, check if it is provided, # try to update the current object by completing missing information # using information provided by the other one and if there are attributes # which have different values, raise an excpetion attributes_to_check = [ 'orf_id', 'raw_sequence', 'raw_sequence_aa', 'start_codon_seq', 'kozak_context', 'orf_length_nt', 'orf_length', 'provided_category', 'ms_info', 'orf_score', 'phylocsf', 'phastcons', 'floss', 'floss_class', 'gen_len_eq_orf_len' ] # List of the attributes for which the value should be set to # None if they are found different att_to_none_if_conflict = [ 'start_codon_seq', 'kozak_context', 'orf_length', 'orf_length_nt', 'ms_info', 'orf_score', 'phylocsf', 'phastcons', 'floss' ] # List of the attributes for which the value should be set to # 'AMBIGUOUS' if they are found different att_to_flag_if_conflict = ['provided_category', 'floss_class'] # List of the attributes storing sequences att_sequences = ['raw_sequence', 'raw_sequence_aa'] # Initialize a dictionary attributes for which there is a conflict conflicting_att = {} for att in attributes_to_check: self_att = getattr(self, att) other_att = getattr(other, att) self_att_is_empty = GeneralUtil.is_empty(self_att) other_att_is_empty = GeneralUtil.is_empty(other_att) # If the other attribute is provided if (not other_att_is_empty): # and if the self attribute is empty, fill it using the information of the other object if self_att_is_empty: setattr(self, att, other_att) # and if the self attribute is filled and the two values of the attribute # are different, reset the value to 'AMBIGUOUS' or None or compute a consensus # for the sequences and keep track of this update # attribute and its values elif ((not self_att_is_empty) and (other_att != self_att)): if (att in att_to_flag_if_conflict): setattr(self, att, Constants.DENCELLORFOBJ_AMBIGUOUS_ATT) conflicting_att[att] = (self_att, other_att, getattr(self, att)) elif (att in att_sequences): if (att == 'raw_sequence'): sqce_type = Constants.SEQUENCE_TYPE_DNA else: sqce_type = Constants.SEQUENCE_TYPE_PROT sqce_list = [self_att, other_att] # Compute the consensus (the threshold is set to 1 as the same # DSORFTranscriptAsso may be updated several time but sequentially. # Setting this threshold to 1 allow to always get a N/X as soon as # there is an ambiguity for one of the nucleotide / amino acid. sqce_consensus = GeneticsUtil.find_sqce_consensus( list_of_sequences=sqce_list, sqce_type=sqce_type, threshold=1) setattr(self, att, sqce_consensus) conflicting_att[att] = (self_att, other_att, getattr(self, att)) elif (att in att_to_none_if_conflict): setattr(self, att, None) conflicting_att[att] = (self_att, other_att, getattr(self, att)) # If there are attributes for which there is a conflict, raise a DenCellORFObjUpdateException if (conflicting_att != {}): except_message = [] for (att, val) in conflicting_att.items(): except_message.append('the attribute ' + att + ' has been found with the values "' + str(val[0]) + '" and "' + str(val[1]) + '",' + ' hence its value has been set to "' + str(val[2]) + '"') except_message = ', '.join(except_message) + '.' raise DenCellORFObjUpdateException(except_message)
class UTDSTranscriptGeneConflict(BaseDS): # ============================================================================= # /!\ This table is build during the insertion of data. It associates to each # (transcript_id, data_source) unique pair for which a conflict about the # Gene entry to which it is related, the list of gene IDs. This allows to # manage the cases where conflicts about the gene are found several times # for a particular DSTranscript entry. # ============================================================================= __tablename__ = 'UTDSTranscriptGeneConflict' transcript_id = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) data_source = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation())) gene_ids = Column( Text(collation=SQLCollationManager.get_instance().get_db_collation())) # Define the primary key attribute __table_args__ = (PrimaryKeyConstraint('transcript_id', 'data_source'), ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their primary keys are all equals. # # @param other: UTDSTranscriptGeneConflict - Another UTDSTranscriptGeneConflict # object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif ((self.transcript_id == other.transcript_id) and (self.data_source == other.data_source)): return True else: return False ## __hash__ # -------- # # Returns the hash value of a UTDSTranscriptGeneConflict object. # The hash value of an instance is computed using its primary key attributes. # # @return the hash value of the UTDSTranscriptGeneConflict object. # def __hash__(self): return hash((self.transcript_id, self.data_source))
class DataSource(BaseDS): __tablename__ = 'DataSource' name = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation())) doi = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation())) description = Column( Text(collation=SQLCollationManager.get_instance().get_db_collation())) url = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation())) annotation_version = Column( String( 10, collation=SQLCollationManager.get_instance().get_db_collation())) ensembl_release = Column( String( 3, collation=SQLCollationManager.get_instance().get_db_collation())) annotation_description = Column(Text) # Define the primary key attribute __table_args__ = (PrimaryKeyConstraint('name'), ) # Define the one-to-many relationships DSORF_list = relationship('DSORF', cascade='all', backref='DataSource') DSTranscript_list = relationship('DSTranscript', cascade='all', backref='DataSource') DSORFTranscriptAsso_list = relationship('DSORFTranscriptAsso', cascade='all', backref='DataSource') ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their "primary key-like" attributes # (i.e. the attributes with unique constraint) are all equals. # # @param other: DataSource - Another DataSource object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif (self.name == other.name): return True else: return False ## __hash__ # -------- # # Returns the hash value of a DataSource object. # The hash value of an instance is computed using its "primary key-like" attributes # (i.e. the attributes with unique constraint). # # @return the hash value of the DataSource object. # def __hash__(self): return hash(self.name)
class ORF(BasePRO): __tablename__ = 'ORF' id = Column(Integer) chromosome = Column( String( 15, collation=SQLCollationManager.get_instance().get_db_collation())) strand = Column( String( 2, collation=SQLCollationManager.get_instance().get_db_collation())) start_pos = Column(Integer) stop_pos = Column(Integer) spliced = Column(Boolean) spliced_parts_count = Column(Integer) splice_starts = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) splice_ends = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) sequence = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) sequence_aa = Column( Text(SQLConstants.MAX_LEN_TEXT, collation=SQLCollationManager.get_instance().get_db_collation())) genomic_length = Column(Integer) count_ds = Column(Integer) count_ds_ambiguous = Column(Integer) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = ( PrimaryKeyConstraint('id'), UniqueConstraint('chromosome', 'strand', 'start_pos', 'stop_pos', 'spliced', 'splice_starts', 'splice_ends', 'spliced_parts_count'), ) # Define one-to-many relationship ORFDSAsso_list = relationship('ORFDSAsso', cascade='all', backref='ORF') ORFTranscriptAsso_list = relationship('ORFTranscriptAsso', cascade='all', backref='ORF') UTBEDContent = relationship('UTBEDContent', cascade='all', backref='ORF', uselist=False) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their "primary key-like" attributes # (i.e. the attributes with unique constraint) are all equals. # # @param other: ORF - Another ORF object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif ((self.id == other.id) or ((self.chromosome == other.chromosome) and (self.strand == other.strand) and (self.start_pos == other.start_pos) and (self.stop_pos == other.stop_pos) and (self.spliced == other.spliced) and (self.spliced_parts_count == other.spliced_parts_count) and (self.splice_starts == other.splice_starts) and (self.splice_ends == other.splice_ends))): return True else: return False ## __hash__ # -------- # # Returns the hash value of a ORF object. # The hash value of an instance is computed using its "primary key-like" attributes # (i.e. the attributes with unique constraint). # # @return the hash value of the ORF object. # def __hash__(self): return hash((self.chromosome, self.strand, self.start_pos, self.stop_pos, self.spliced, self.spliced_parts_count, self.splice_starts, self.splice_ends))
class DSTranscript( BaseDS ): __tablename__ = 'DSTranscript' id = Column( Integer ) transcript_id = Column( String( 255, collation = SQLCollationManager.get_instance().get_db_collation() ) ) data_source = Column( String( 50, collation = SQLCollationManager.get_instance().get_db_collation() ), ForeignKey( 'DataSource.name', ondelete='CASCADE', onupdate='CASCADE' ) ) gene_id = Column( String( Constants.MAX_LEN_STRING, collation = SQLCollationManager.get_instance().get_db_collation() ), ForeignKey( 'Gene.gene_id', ondelete='CASCADE', onupdate='CASCADE' ) ) raw_strand = Column( String( 2, collation = SQLCollationManager.get_instance().get_db_collation() ) ) raw_start_pos = Column( Integer ) raw_end_pos = Column( Integer ) strand = Column( String( 2, collation = SQLCollationManager.get_instance().get_db_collation() ) ) start_pos = Column( Integer ) end_pos = Column( Integer ) raw_cds_start_pos = Column( Integer ) raw_cds_stop_pos = Column( Integer ) cds_start_pos = Column( Integer ) cds_stop_pos = Column( Integer ) rna_biotype = Column( String( 255, collation=SQLCollationManager.get_instance().get_db_collation() ) ) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = ( PrimaryKeyConstraint( 'id' ), UniqueConstraint( 'transcript_id', 'data_source' ), ) # Define the one-to-many relationship DSORFTranscriptAsso_list = relationship( 'DSORFTranscriptAsso', cascade = 'all', backref = 'DSTranscript' ) ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their "primary key-like" attributes # (i.e. the attributes with unique constraint) are all equals. # # @param other: DSTranscript - Another DSTranscript object to compare to this object. # # @return Boolean - Are this object and the other one equal? # def __eq__( self, other ): # Check if other object is of the same class if ( type( other ) != type( self ) ): return False # Check if the transcript ID and the source are the same elif ( ( self.transcript_id == other.transcript_id ) and ( self.data_source == other.data_source ) ): return True else: return False ## __hash__ # -------- # # Returns the hash value of a DSTranscript object. # The hash value of an instance is computed using its "primary key-like" attributes # (i.e. the attributes with unique constraint). # # @return the hash value of the DSTranscript object. # def __hash__( self ): return hash( ( self.transcript_id, self.data_source ) ) ## update # ------ # # Try to combine two instances of this class when they are "equals" # (i.e. share the same values for "primary key-like attributes") by getting the # missing attributes. It tests the equality between non-primary-key attributes # of two equal instances of this class, and raise an DenCellORFObjUpdateException # if DSTranscript compared have different values for their non-primary key # attributes. # If: # - The self object is missing some values the other object has, then it updates # the current object. # - The information provided by both objects are contradictory, it logs a warning. # # @param other: DSTranscript - Another DSTranscript object "equal" to the current one. # @param gene_id_conflicts: List - The list of all gene IDs that have been found associated # to the DSTranscript entry. None by default. # # @throw DenCellORFTrUpdateException: When different values are found for the same attributes # of two objects assumed to be the same. # def update( self, other, gene_id_conflicts=None ): # For each non-primary-key attribute, check if it is provided, # try to update the current object by completing missing information # using information provided by the other one and if there are attributes # which have different values, raise an excpetion attributes_to_check = [ 'raw_strand', 'raw_start_pos', 'raw_end_pos', 'raw_cds_start_pos', 'raw_cds_stop_pos', 'rna_biotype' ] # List of the attributes for which the value should be set to # 'AMBIGUOUS' if they are found different att_to_flag_if_conflict = [ 'rna_biotype' ] # Initialize a dictionary attributes for which there is a conflict conflicting_att = {} # Check the gene IDs. If the two gene IDs are different, create a new ID # by concatenating the names of the two genes. self_gene = self.gene_id other_gene = other.gene_id if ( self_gene != other_gene ): init_gene_id_conflicts = gene_id_conflicts # If a conflict has already been registered for this DSTranscript, # then get the list of genes associated to it. # Otherwise, register this first conflict. if gene_id_conflicts: # Get the name of the gene which does not start with the CONFLICT prefix if ( ( not self_gene.startswith( Constants.PREFIX_CONFLICT_GENE_TRANSCRIPT ) ) and ( self_gene not in gene_id_conflicts ) ): gene_id_conflicts.append( self_gene ) if ( ( not other_gene.startswith( Constants.PREFIX_CONFLICT_GENE_TRANSCRIPT ) ) and ( other_gene not in gene_id_conflicts ) ): gene_id_conflicts.append( other_gene ) else: # Register this new conflict and order the gene IDs gene_id_conflicts = [ self_gene, other_gene ] # Order the IDs in order to always get the same new ID when the same gene IDs are conflicting gene_id_conflicts.sort() # Register the new list of conflict on gene IDs only if it has been updated if ( gene_id_conflicts != init_gene_id_conflicts ): # Create a new "conflict" id new_gene_id = Constants.PREFIX_CONFLICT_GENE_TRANSCRIPT + '_'.join( gene_id_conflicts ) self.gene_id = new_gene_id conflicting_att[ 'gene_id' ] = ( self_gene, other_gene, self.gene_id ) # Check all the other non-primary-key attributes for att in attributes_to_check: self_att = getattr( self, att ) other_att = getattr( other, att ) # If the attribute treated is the raw strand, the '-' value # should not be considered as missing value if ( att == 'raw_strand' ): self_att_is_empty = GeneralUtil.is_empty( val = self_att, empty_val = Constants.EMPTY_VALUES_WO_DASH ) other_att_is_empty = GeneralUtil.is_empty( val = other_att, empty_val = Constants.EMPTY_VALUES_WO_DASH ) else: self_att_is_empty = GeneralUtil.is_empty( self_att ) other_att_is_empty = GeneralUtil.is_empty( other_att ) # If the other attribute is provided if ( not other_att_is_empty ): # and if the self attribute is empty, fill it using the information of the other object if self_att_is_empty: setattr( self, att, other_att ) # and if the self attribute is filled and the two values of the attribute are different, # reset the value to 'AMBIGUOUS' and keep track of this attribute and its values elif ( ( not self_att_is_empty ) and ( other_att != self_att ) ): if att in att_to_flag_if_conflict: setattr( self, att, Constants.DENCELLORFOBJ_AMBIGUOUS_ATT ) else: setattr( self, att, None ) conflicting_att[ att ] = ( self_att, other_att, getattr(self, att) ) # If there are attributes for which there is a conflict, # raise a DenCellORFTrUpdateException if conflicting_att != {}: except_message = [] for (att, val) in conflicting_att.items(): except_message.append( 'the attribute ' + att + ' has been found with the values "' + str( val[0] ) + '" and "' + str( val[1] ) + '",' + ' hence its value has been set to "' + str( val[2] ) + '"' ) except_message = ', '.join( except_message ) + '.' # If a conflict has been found regarding the gene ID, raise the exception # with the value of the new gene ID to create and to associate to this transcript if ( 'gene_id' in conflicting_att.keys() ): raise DenCellORFTrUpdateException( message = except_message, gene_id = self.gene_id, gene_id_conflicts = gene_id_conflicts ) else: raise DenCellORFTrUpdateException( message = except_message )
class DSORF(BaseDS): __tablename__ = 'DSORF' id = Column(Integer) data_source = Column( String( 50, collation=SQLCollationManager.get_instance().get_db_collation()), ForeignKey('DataSource.name', ondelete='CASCADE', onupdate='CASCADE')) chromosome = Column( String( 15, collation=SQLCollationManager.get_instance().get_db_collation())) raw_strand = Column( String( 2, collation=SQLCollationManager.get_instance().get_db_collation())) raw_start_pos = Column(Integer) raw_stop_pos = Column(Integer) spliced = Column(Boolean) raw_splice_starts = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) raw_splice_ends = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) spliced_parts_count = Column(Integer) strand = Column( String( 2, collation=SQLCollationManager.get_instance().get_db_collation())) start_pos = Column(Integer) stop_pos = Column(Integer) splice_starts = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) splice_ends = Column( String( Constants.MAX_LEN_STRING, collation=SQLCollationManager.get_instance().get_db_collation())) raw_genomic_length = Column(Integer) genomic_length = Column(Integer) genomic_length_diff = Column(Integer) liftover_succeed = Column(Boolean) # Define the primary key and the unique constraint on the "primary key-like" attributes __table_args__ = ( PrimaryKeyConstraint('id'), UniqueConstraint('data_source', 'chromosome', 'raw_strand', 'raw_start_pos', 'raw_stop_pos', 'spliced', 'raw_splice_starts', 'raw_splice_ends', 'spliced_parts_count'), ) # Define the one-to-many relationship DSORFTranscriptAsso_list = relationship('DSORFTranscriptAsso', cascade='all', backref='DSORF') ## __eq__ # ------ # # Tests the equality between two instances of this class. # Two instances are considered equals if their "primary key-like" attributes # (i.e. the attributes with unique constraint) are all equals. # # @param other: DSORF - Another DSORF object to compare to this object. # # @return Boolean - Are this object and 'other' equal? # def __eq__(self, other): # Check if other object is of the same class if (type(other) != type(self)): return False # Check if the two instances may be considered equal elif ((self.raw_start_pos == other.raw_start_pos) and (self.raw_stop_pos == other.raw_stop_pos) and (self.raw_strand == other.raw_strand) and (self.chromosome == other.chromosome) and (self.spliced == other.spliced) and (self.raw_splice_starts == other.raw_splice_starts) and (self.raw_splice_ends == other.raw_splice_ends) and (self.spliced_parts_count == other.spliced_parts_count) and (self.data_source == other.data_source)): return True else: return False ## __hash__ # -------- # # Returns the hash value of a DSORF object. # The hash value of an instance is computed using its "primary key-like" attributes # (i.e. the attributes with unique constraint). # # @return the hash value of the DSORF object. # def __hash__(self): return hash((self.data_source, self.chromosome, self.raw_strand, self.raw_start_pos, self.raw_stop_pos, self.spliced, self.raw_splice_starts, self.raw_splice_ends, self.spliced_parts_count))
try: # Set the level of verbosity of the main Logger verbosity_level = OptionManager.get_instance().get_option( OptionConstants.OPTION_VERBOSITY) if (verbosity_level not in Constants.LOG_MODES.keys()): Logger.get_instance().critical( 'The level of verbosity selected is incorrect.' + ' It must be one of: ' + ', '.join(Constants.LOG_MODES.keys()) + '. Please see the documentation for more information.') Logger.get_instance().set_mode(Constants.LOG_MODES[verbosity_level]) # Get the type of database, in order to set the appropriate collation for strings SQLCollationManager.get_instance().set_db_collation( OptionManager.get_instance().get_option( OptionConstants.OPTION_DB_TYPE)) # NB: The execution module is only imported after this step in order to allow a # "dynamic" creation of the model (classes inheriting from SQLAlchemy Base). # Indeed, once the model has been loaded, this is not possible to update the # collation of the columns. from fr.tagc.uorf.core.execution import * # Instantiate DenCellORF DenCellORF = DenCellORF() # Execute the strategy DenCellORF.execute() except Exception as e: Logger.get_instance().critical(