class CurationRequest(SVIPModel): """ Represents a request for a specific variant + disease to be curated. These requests are either automatically generated by the system or created by clinicians. If the request is created by a clinician, it's considered a higher priority than one that the system generates. """ submission = models.ForeignKey('SubmittedVariant', on_delete=models.SET_NULL, null=True) variant = models.ForeignKey(to=Variant, on_delete=models.SET_NULL, null=True) disease = ForeignKey(to=Disease, on_delete=DB_CASCADE) # there isn't always a requestor, e.g. in the case where it's system-generated or a curator created it themselves # if requestor is null, then this is a lower priority than one that was requested by a clinician requestor = models.TextField(null=True) created_on = models.DateTimeField(default=now, db_index=True) due_by = models.DateTimeField(db_index=True, null=True) # TODO: add method that determines the status of the request based on: # 1. whether curation entries exist (e.g., someone 'claimed' it) # 2. the progress of the reviews # statuses would evolve like so: # -> submitted # -> claimed (curation entry exists) # -> under review (entries submitted for review) # -> finalized (all reviews for all entries completed) class Meta: unique_together = ('submission', 'variant')
class CollapsedAssociation(models.Model): """ A simplified version of Association that collapses entries that have the same information except the source evidence ID and the list of publications, to circumvent denormalized entries in public databases. For example, CIViC has three entries for (BRAF, V600E, Cholangiocarcinoma) that only differ by the publications associated with the submission. They should instead be a single entry with three publications. Additionally, this model presents a schema that's closer to what the front-end expects, removing the need to transform and/or aggregate fields in the client (e.g., the contexts field, which is a concatenation of all the EnvironmentalContext.description values for this Association). """ variant_in_source = ForeignKey(to=VariantInSource, null=False, on_delete=models.DO_NOTHING) disease = models.TextField(blank=True, null=True) evidence_type = models.TextField(blank=True, null=True) evidence_direction = models.TextField(blank=True, null=True) clinical_significance = models.TextField(blank=True, null=True) drug_labels = models.TextField(blank=True, null=True) contexts = models.TextField(blank=True, null=True) evidence_levels = models.TextField(blank=True, null=True) publications = ArrayField(base_field=models.TextField(), blank=True, null=True, verbose_name="Publication URLs") children = JSONField(blank=True, null=True) collapsed_count = models.BigIntegerField(blank=True, null=True) class Meta: managed = False # Created from a view. Don't remove. db_table = 'api_collapsed_associations'
class Sample(SVIPModel): disease_in_svip = ForeignKey(to=DiseaseInSVIP, on_delete=DB_CASCADE) sample_id = models.TextField(verbose_name="Sample ID") year_of_birth = models.TextField(verbose_name="Year of birth") gender = models.TextField(verbose_name="Gender") hospital = models.TextField(verbose_name="Hospital") medical_service = models.TextField(verbose_name="Medical service") provider_annotation = models.TextField(verbose_name="Provider annotation") sample_tissue = models.TextField(verbose_name="Sample Tissue") tumor_purity = models.TextField(verbose_name="Tumor purity") tnm_stage = models.TextField(verbose_name="TNM stage") sample_type = models.TextField(verbose_name="Sample type") sample_site = models.TextField(verbose_name="Sample site") specimen_type = models.TextField(verbose_name="Specimen Type") sequencing_date = models.TextField(verbose_name="Sequencing date") panel = models.TextField(verbose_name="Panel") coverage = models.TextField(verbose_name="Coverage") calling_strategy = models.TextField(verbose_name="Calling strategy") caller = models.TextField(verbose_name="Caller") aligner = models.TextField(verbose_name="Aligner") software = models.TextField(verbose_name="Software") software_version = models.TextField(verbose_name="Software version") platform = models.TextField(verbose_name="Platform") contact = models.TextField(verbose_name="Contact")
class Phenotype(models.Model): association = ForeignKey(to=Association, on_delete=DB_CASCADE) source = models.TextField(null=True) term = models.TextField(null=True) pheno_id = models.TextField( null=True) # just called 'id' in the original object family = models.TextField(null=True) description = models.TextField(null=True)
class EnvironmentalContext(models.Model): association = ForeignKey(to=Association, on_delete=DB_CASCADE) source = models.TextField(null=True) term = models.TextField(null=True) envcontext_id = models.TextField( null=True) # just called 'id' in the original object usan_stem = models.TextField(null=True) description = models.TextField()
class DiseaseInSVIP(SVIPModel): svip_variant = ForeignKey(to=VariantInSVIP, on_delete=DB_CASCADE) # a null disease means that the samples or curation entries associated with this variant belong to an # unknown or unspecified disease. disease = ForeignKey(to=Disease, null=True, on_delete=DB_CASCADE) status = models.TextField(default="Loaded") score = models.IntegerField(default=0) objects = DiseaseInSVIPManager() def name(self): return self.disease.name if self.disease else "Unspecified" class Meta(SVIPModel.Meta): verbose_name = "Disease in SVIP" verbose_name_plural = "Diseases in SVIP" unique_together = ('svip_variant', 'disease')
class Evidence(models.Model): association = ForeignKey(to=Association, on_delete=DB_CASCADE) # originally under association.evidence[].info.publications[] publications = ArrayField(base_field=models.TextField(), null=True, verbose_name="Publication URLs") # originally under association.evidence[].evidenceType.sourceName evidenceType_sourceName = models.TextField(null=True) # originally under association.evidence[].evidenceType.id evidenceType_id = models.TextField(null=True)
class AppliedCoupon(models.Model): """ AppliedCoupon Model represents a mapping between coupon_code and user """ user = ForeignKey(User, DB_CASCADE) coupon_code = models.CharField(max_length=255) class Meta: db_table = 'applied_coupon' def __str__(self): return f"{self.user.email} - {self.coupon_code} "
class Mechanic(models.Model): """ Mechanic Model represents a mechanic for the application """ mechanic_code = models.CharField(max_length=100, null=False, unique=True) user = ForeignKey(User, DB_CASCADE) class Meta: db_table = 'mechanic' def __str__(self): return f"<Mechanic: {self.mechanic_code}>"
class Order(models.Model): """ Order Model represents an order in the application """ user = ForeignKey(User, DB_CASCADE) product = ForeignKey(Product, DB_CASCADE) quantity = models.IntegerField(default=1) created_on = models.DateTimeField() STATUS_CHOICES = Choices( ("DELIVERED", "delivered", "delivered"), ("RETURN_PENDING", "return pending", "return pending"), ("RETURNED", "returned", "returned")) status = models.CharField(max_length=20, choices=STATUS_CHOICES, default=STATUS_CHOICES.DELIVERED) class Meta: db_table = 'order' def __str__(self): return f"{self.user.email} - {self.product.name} "
class ServiceRequest(models.Model): """ Service Request Model represents a service request in the application """ mechanic = ForeignKey(Mechanic, DB_CASCADE) vehicle = ForeignKey(Vehicle, DB_CASCADE) problem_details = models.CharField(max_length=500, blank=True) created_on = models.DateTimeField() updated_on = models.DateTimeField(null=True) STATUS_CHOICES = Choices(('PEN', "Pending", "Pending"), ('FIN', "Finished", "Finished")) status = models.CharField(max_length=10, choices=STATUS_CHOICES, default=STATUS_CHOICES.PEN) class Meta: db_table = 'service_request' def __str__(self): return f'<ServiceRequest: {self.id}>'
class CurationReview(SVIPModel): """ An assignemt of a curation entry to a reviewer. Typically 3 reviewers are selected, each of whom provides a review. Once all votes are in: - If all three pass, the curation entry is considered 'reviewed' and finalized. - If any reject, the curation entry is returned to the 'saved' status(?) for the curator to fix and resubmit or abandon. """ curation_entry = ForeignKey(to=CurationEntry, on_delete=DB_CASCADE) reviewer = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=DB_CASCADE) created_on = models.DateTimeField(default=now, db_index=True) last_modified = models.DateTimeField(auto_now=True, db_index=True) status = models.TextField(verbose_name="Review Status", choices=tuple(REVIEW_STATUS.items()), default='pending', db_index=True)
class Association(models.Model): """ Represents the connection between a Variant and its Phenotypes, Evidences, and EnvironmentalContexts for a specific source. There is one Association per pairing of variant, (sets of) phenotypes, and (sets of) contexts. """ variant_in_source = ForeignKey(to=VariantInSource, on_delete=DB_CASCADE, null=False) # FIXME: move this to VariantInSource source_url = models.TextField( null=True, verbose_name="URL of the variant's entry in the source") source = models.TextField() # temporary field to log exactly what we're getting from the server payload = JSONField(default=dict) description = models.TextField(null=True) drug_labels = models.TextField(null=True) drug_interaction_type = models.TextField(null=True) variant_name = models.TextField( null=True ) # here for debugging, remove if it's always the name as Variant__name source_link = models.TextField( null=True, verbose_name="URL of the association's entry in the source") # these are modeled after CIViC, with the other sources' ratings shoehorned into these fields evidence_type = models.TextField(null=True, choices=EVIDENCE_TYPES) evidence_direction = models.TextField(null=True) clinical_significance = models.TextField(null=True) evidence_level = models.TextField(null=True) # stores errors, exceptions encountered while crawling crawl_status = JSONField(null=True) # stores extra data that doesn't fit into the association structure extras = JSONField(null=True, verbose_name="Association-specific extra data")
class SubmittedVariant(SVIPModel): """ A description of a variant that, once processed by the SVIP harvester, will become a variant. The metadata for that variant will be retrieved via VEP. """ SUBMITTED_VAR_STATUS = OrderedDict(( ('pending', 'pending'), ('completed', 'completed'), ('error', 'error'), )) SUBMITTED_VAR_CHROMOSOME = tuple( (x, x) for x in list(range(1, 23)) + ["X", "Y", "MT"]) owner = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True) created_on = models.DateTimeField(default=now, db_index=True) chromosome = models.TextField(choices=SUBMITTED_VAR_CHROMOSOME) pos = models.IntegerField() ref = models.TextField() alt = models.TextField() canonical_only = models.BooleanField( default=False, help_text='If true, only retain VEP variants marked canonical') # an optional batch from which this variant originates batch = ForeignKey(SubmittedVariantBatch, on_delete=models.SET_NULL, null=True) status = models.TextField(verbose_name="Processing Status", choices=tuple(SUBMITTED_VAR_STATUS.items()), default='pending', db_index=True) processed_on = models.DateTimeField(db_index=True, null=True) error_msg = models.TextField(null=True) # the set of variants that were harvested from VEP for this submission # (there can be more than one, since VEP can return multiple results for a single chrom/pos/ref/alt set) resulting_variants = ArrayField(base_field=models.IntegerField(), null=True) for_curation_request = models.BooleanField( default=False, help_text= 'If true, a curation request should be created when this submission is completed' ) curation_disease = models.ForeignKey( Disease, on_delete=models.SET_NULL, null=True, help_text= 'If for_curation_request is true, identifies the disease to which the new curation request should be associated' ) requestor = models.TextField( null=True, help_text= 'If for_curation_request is true, identifies who asked for this variant to be submitted' ) objects = SubmittedVariantManager() @property def icdo_morpho(self): if not self.curation_disease: return None return self.curation_disease.icd_o_morpho @property def icdo_topo(self): if not self.curation_disease: return None return [ x.icd_o_topo for x in self.curation_disease.icdotopoapidisease_set.all() ] def owner_name(self): if not self.owner: return "N/A" fullname = ("%s %s" % (self.owner.first_name, self.owner.last_name)).strip() return fullname if fullname else self.owner.username def as_vcf_row(self): # CHROM POS ID REF ALT QUAL FILTER INFO # QUAL and INFO are both '.', which indicates an empty field(?) original_alt = ",".join([ x.strip() for x in self.alt.strip("[]").replace("None", ".").split(",") ]) return "\t".join( str(x) for x in [ self.chromosome, self.pos, self.id, self.ref, original_alt, '.', 'PASS', '.' ]) + "\n"
class CurationEntry(SVIPModel): """ Represents a curation entry generated by a curator. Generally, these are in response to a request, although (apparently?) a curator can create a curation entry whenever they want. """ disease = ForeignKey(to=Disease, on_delete=models.SET_NULL, null=True, blank=True) # variants = models.ManyToManyField(to=Variant) variant = models.ForeignKey(to=Variant, on_delete=DB_CASCADE) extra_variants = models.ManyToManyField(to=Variant, through='VariantCuration', related_name='variants_new') type_of_evidence = models.TextField(verbose_name="Type of evidence", null=True) drugs = ArrayField(base_field=models.TextField(), verbose_name="Drugs", null=True, blank=True) interactions = ArrayField(base_field=models.TextField(), verbose_name="Interactions", null=True, blank=True) effect = models.TextField(verbose_name="Effect", null=True) tier_level_criteria = models.TextField(verbose_name="Tier level Criteria", null=True) tier_level = models.TextField(verbose_name="Tier level", null=True) mutation_origin = models.TextField(verbose_name="Mutation Origin", default="Somatic", null=True, blank=True) associated_mendelian_diseases = models.TextField( verbose_name="Associated Mendelian Disease(s)", null=True, blank=True) summary = models.TextField(verbose_name="Complementary information", null=True, blank=True) support = models.TextField(verbose_name="Support", null=True, blank=True) comment = models.TextField(verbose_name="Comment", null=True, blank=True) references = models.TextField(verbose_name="References", null=True) annotations = ArrayField(base_field=models.TextField(), null=True) created_on = models.DateTimeField(default=now, db_index=True) last_modified = models.DateTimeField(auto_now=True, db_index=True) owner = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, on_delete=models.SET_NULL) status = models.TextField(verbose_name="Curation Status", choices=tuple(CURATION_STATUS.items()), default='draft', db_index=True) # optionally, this curation entry could be the result of "claiming" a curation request # FIXME: should we always generate a curation request to make tracking its review easier? # FIXME #2: assumedly batches of curation entries are submitted for a single request, so this should be mandatory request = ForeignKey(to=CurationRequest, on_delete=DB_CASCADE, null=True, blank=True) # FIXME: should we also track the review status's in the entry's history? is that even possible? # it might be: https://django-simple-history.readthedocs.io/en/latest/historical_model.html#adding-additional-fields-to-historical-models history = HistoricalRecords(excluded_fields=('created_on', 'last_modified'), cascade_delete_history=True) @property def icdo_morpho(self): if not self.disease: return None return self.disease.icd_o_morpho @property def icdo_topo(self): if not self.disease: return None return [ x.icd_o_topo for x in self.disease.icdotopoapidisease_set.all() ] objects = CurationEntryManager() def owner_name(self): if not self.owner: return "N/A" fullname = ("%s %s" % (self.owner.first_name, self.owner.last_name)).strip() return fullname if fullname else self.owner.username def ensure_svip_provenance(self): """ Ensures that a VariantInSVIP exists for the variants/extra variants mentioned in this entry, and that a DiseaseInSVIP exists for the variant+disease. This includes the case in which the disease is null, in which case an "Unspecified" disease is associated with the VariantInSVIP. :return: a list of dicts of created objects, [{svip: (entry, created), diseaseinsvip: (entry,created)}, ...] """ created_entries = [] for variant in chain([self.variant], self.extra_variants.all()): # first, see if a SVIPVariant exists svip_variant, svip_created = VariantInSVIP.objects.get_or_create( variant=variant) disease_in_svip, disease_created = DiseaseInSVIP.objects.get_or_create( svip_variant=svip_variant, disease=self.disease) created_entries.append({ 'variant': variant, 'svip': (svip_variant, svip_created), 'diseaseinsvip': (disease_in_svip, disease_created), }) return created_entries class Meta: verbose_name = "Curation Entry" verbose_name_plural = "Curation Entries"
class Variant(models.Model): gene = ForeignKey(to=Gene, on_delete=DB_CASCADE) name = models.TextField(null=False, db_index=True, verbose_name="Variant Name") description = models.TextField(null=True, db_index=True) biomarker_type = models.TextField(null=True) so_hierarchy = ArrayField(base_field=models.CharField(max_length=15), null=True, verbose_name="Hierarchy of SO IDs") soid = models.TextField(null=True, verbose_name="Sequence Ontology ID", default="") so_name = models.TextField(null=True, db_index=True) reference_name = models.TextField(null=True) # e.g., GRCh37 refseq = models.TextField(null=True) # an NCBI refseq id isoform = models.TextField( null=True ) # not clear, but typically an ensembl transcript accession ID # position and change data chromosome = models.CharField(max_length=10, null=True) start_pos = models.IntegerField(null=True) end_pos = models.IntegerField(null=True) ref = models.TextField(null=True) alt = models.TextField(null=True) hgvs_g = models.TextField(null=True, db_index=True) hgvs_c = models.TextField(null=True, db_index=True) hgvs_p = models.TextField(null=True, db_index=True) somatic_status = models.TextField(null=True) # external references dbsnp_ids = ArrayField(base_field=models.TextField(), null=True, verbose_name="UniProt IDs") myvariant_hg19 = models.TextField( null=True, verbose_name="=MyVariant.info URL (hg19)") mv_info = JSONField( null=True ) # optional info pulled from myvariant.info; see normalizers.myvariant_enricher # we should be able to compute this from api_source sources = ArrayField(base_field=models.TextField(), null=True, verbose_name="Sources") # stores errors, exceptions encountered while crawling crawl_status = JSONField(null=True) objects = VariantManager() def __str__(self): # return "%s %s" % (self.gene.symbol, self.name) return self.description def gene_symbol(self): return self.gene.symbol def in_svip(self): return hasattr(self, 'variantinsvip') and self.variantinsvip is not None def natural_key(self): return self.description, self.hgvs_g class Meta: indexes = [ models.Index(fields=['gene', 'name']), models.Index(fields=['gene', 'name', 'hgvs_c']), ]
class VariantInSource(models.Model): """ Represents the entry that a specific source (say, CIViC) has for a variant. This includes source-specific variant- level information, such as the link to the variant in that source. """ variant = ForeignKey(to=Variant, on_delete=DB_CASCADE, db_index=True) source = ForeignKey(to=Source, on_delete=DB_CASCADE, db_index=True) variant_url = models.TextField( null=True, verbose_name="Variant's URL for this source") # this holds source-specific values like COSMIC's variant-level FATHMM predictions/scores extras = JSONField(null=True, verbose_name="Source-specific extra data") class Meta: unique_together = (("variant", "source"), ) # TODO: add link to variant in that source (i.e, move it from Association to here) # TODO: add aggregations that the front-end is currently computing on its own, specifically: # diseases: [], # database_evidences: [], (this is actually just used to count the number of evidence items) # (review val h.'s request to count the number of evidence item PMIDs, not sure it makes sense) # clinical: [], (this is actually the association object, but it does a lot of client-side stuff, so we won't replicate it here) # scores: [] def association_count(self): return self.association_set.count() def publication_count(self, is_distinct=False): q = (self.association_set.annotate(q=Func( F('evidence__publications'), function='unnest')).values_list( 'q', flat=True)) return q.count() if not is_distinct else q.distinct().count() def diseases(self): # FIXME: this sucks, but clinvar right now is pre-aggregated by disease, so we need another # way to surface the real submission count (which is held in extras.num_submissions) # this is used to render the 'disease' filtering side tables in the third-party evidence panel if self.source.name == 'clinvar': return (self.association_set.values( disease=F('phenotype__term')).annotate(count=Sum( Cast(KeyTextTransform('num_submissions', 'extras'), IntegerField()))).distinct().order_by('-count')) return (self.association_set.values( disease=F('phenotype__term')).annotate( count=Count('phenotype__term')).distinct().order_by('-count')) def diseases_collapsed(self): # this is used to render the 'disease' filtering side tables in specifically civic's third-party evidence panel # (or whichever third-party evidence lists use collapsed associations rather than normal ones) return (self.collapsedassociation_set.values('disease').annotate( count=Count('disease')).distinct().order_by('-count')) def contexts(self): # this is used to render the 'tissues' filtering side tables in specifically cosmic's third-party evidence panel return (self.association_set.values( context=F('environmentalcontext__description')).exclude( context__isnull=True).annotate( count=Count('environmentalcontext__description')).distinct( ).order_by('-count')) def diseases_contexts(self): # this is used to render the 'diseases/tissues' visualization on the variant details page pairs = (self.association_set.values( disease=F('phenotype__term'), context=F('environmentalcontext__description')).exclude( disease__isnull=True, context__isnull=True).annotate( count=Count('environmentalcontext__description')).distinct( ).order_by('disease')) return ({ x[0]: {y['context']: y['count'] for y in x[1]} } for x in itertools.groupby(pairs, lambda x: x['disease'])) def clinical_significances(self): return (self.association_set.values( name=Coalesce('clinical_significance', Value('N/A'))).annotate( count=Count(Coalesce('clinical_significance', Value( 'N/A')))).distinct().order_by('-count')) def evidence_types(self): # return ( # self.association_set # .values(name=F('evidence_type')) # .annotate(count=Count('evidence_type')) # .distinct().order_by('-count') # ) with connection.cursor() as cursor: cursor.execute( """ select aa.evidence_type as name, count(*) as count, case when evidence_type='Predisposing' or evidence_type='Functional' then array( select row_to_json(r.*) from ( select as_in.clinical_significance as name, count(*) as count from api_association as_in where as_in.id=any(array_agg(aa.id)) group by as_in.clinical_significance ) r ) END as subsigs from api_variantinsource vis inner join api_variant av on vis.variant_id = av.id inner join api_association aa on vis.id = aa.variant_in_source_id where vis.id=%s group by aa.evidence_type; """, [self.id]) return dictfetchall(cursor) def scores(self): return dict( self.association_set.values( score=Coalesce('evidence_level', Value('n/a'))).annotate( count=Count(Coalesce('evidence_level', Value( 'n/a')))).distinct().values_list('score', 'count'))