def parse_mhc_allele(self, allele: str, pattern=H2_ALLELE_PATTERN) -> MhcAllele: match = H2_NETMHCPAN_ALLELE_PATTERN.match(allele) if match: # this ensures that netmhcpan output is normalized allele = "H2{gene}{protein}".format(gene=match.group(1), protein=match.group(2)) match = H2_ALLELE_PATTERN.match(allele) if match is None: raise NeofoxDataValidationException( "Allele does not match H2 allele pattern {}". format(allele) if allele != "" else "Please check the format of provided alleles. An empty allele is provided" ) gene = match.group(1) protein = match.group(2) # controls for existence in the HLA database and warns the user mhc_allele = MhcAllele(gene=gene, protein=protein) if not self.mhc_database.exists(mhc_allele): logger.warning( "Allele {} does not exist in the H2 database".format(allele)) # builds a normalized representation of the allele name = "{gene}{protein}".format(gene=gene, protein=protein) # full name is the same as name in this case as the pattern does not allow variability mhc_allele.name = name mhc_allele.full_name = name return mhc_allele
def parse_mhc_allele(self, allele: str) -> MhcAllele: match = HLA_ALLELE_PATTERN_WITHOUT_SEPARATOR.match(allele) if match is not None: # allele without separator, controls for ambiguities gene = match.group(1) group = match.group(2) protein = match.group(3) default_allele_exists = self.mhc_database.exists( MhcAllele(gene=gene, group=group, protein=protein)) if not default_allele_exists: # if default allele does not exist, tries alternative protein = group[-1:] + protein group = group[0:-1] else: # infers gene, group and protein from the name match = HLA_ALLELE_PATTERN.match(allele) if match is None: raise NeofoxDataValidationException( "Allele does not match HLA allele pattern {}". format(allele) if allele != "" else "Please check the format of provided alleles. An empty allele is provided" ) gene = match.group(1) group = match.group(2) protein = match.group(3) # controls for existence in the HLA database and warns the user mhc_allele = MhcAllele(gene=gene, group=group, protein=protein) if not self.mhc_database.exists(mhc_allele): logger.warning( "Allele {} does not exist in the HLA database".format(allele)) # builds a normalized representation of the allele name = "HLA-{gene}*{serotype}:{protein}".format(gene=gene, serotype=group, protein=protein) # ensures that full name stores the complete allele as provided but normalizes # its representation full_name = name six_digits_id = match.group(4) if six_digits_id is not None and six_digits_id != "": full_name = full_name + ":{}".format(six_digits_id) eight_digits_id = match.group(5) if eight_digits_id is not None and eight_digits_id != "": full_name = full_name + ":{}".format(eight_digits_id) expression_change = match.group(6) if expression_change is not None and expression_change != "": full_name = full_name + expression_change mhc_allele.name = name mhc_allele.full_name = full_name return mhc_allele