def get_genetic_test(cols, gene): idx = CanRiskPedigree.get_column_idx(gene) if idx < 0: raise PedigreeError("Genetic test column for '" + gene + "not found.") gt = cols[idx].split(':') return GeneticTest(gt[0], gt[1])
def get_genetic_test(cols, gene): idx = CanRiskPedigree.get_column_idx(gene, file_type) if idx < 0: if gene == "BARD1" and file_type == "canrisk1": return GeneticTest() raise PedigreeError("Genetic test column for '" + gene + "not found.") gt = cols[idx].split(':') return GeneticTest(gt[0], gt[1])
def validate(self): """ Validation check for pedigree input. @param p: Person to validate pedigree data. """ if (len(self.famid) > settings.MAX_LENGTH_PEDIGREE_NUMBER_STR or not REGEX_ALPHANUM_HYPHENS.match(self.famid) or # must be alphanumeric plus hyphen REGEX_ONLY_HYPHENS.match(self.famid) or # but not just hyphens REGEX_ONLY_ZEROS.match(self.famid)): # and not just zeros raise PedigreeError( "Family ID (1st data column) has been set to '" + self.famid + "'. Family IDs must be specified with between 1 and " + str(settings.MAX_LENGTH_PEDIGREE_NUMBER_STR) + " non-zero number or alphanumeric characters.") unconnected = self.unconnected() if len(unconnected) > 0: raise PedigreeError("Pedigree (" + self.famid + ") family members are not physically " + "connected to the target: " + str(unconnected)) # Check that the index's parameters are valid target = self.get_target() if target.yob == '0': raise PedigreeError( "The target's year of birth has been set to '" + target.yob + "'. This person must be assigned a valid year of birth.") if target.age == '0': raise PedigreeError("The target's age has been set to '" + target.age + "'. This person must be assigned an age.") # Check that carrier probabilities / cancer risks can be computed carrier_probs = self.is_carrier_probs_viable(target=target) cancer_risks = self.is_risks_calc_viable(target=target) if (not carrier_probs and not cancer_risks): raise PedigreeError( "BOADICEA cannot compute mutation carrier probabilities because the target '" + target.pid + "' has a positive genetic test. Also BOADICEA cannot compute breast and ovarian cancer " "risks because the target is: (1) over " + str(settings.MAX_AGE_FOR_RISK_CALCS) + " years old or (2) male, or (3) an affected female who has developed contralateral " "breast cancer, ovarian cancer or pancreatic cancer.") # # Check monozygotic (MZ) twin data twin_store = self.get_twins() # Check that MZ siblings are only specified as twins, no identical triplets etc for t in twin_store: twins = twin_store[t] if len(twins) != 2: raise PedigreeError( "MZ twin identifier '" + str(twins[0].pid) + "' does not appear twice in the pedigree file. " "Only MZ twins are permitted in the pedigree, MZ triplets or quads are not allowed." ) # Check MZ twin characters are valid if len(t) != 1 or t not in settings.UNIQUE_TWIN_IDS: raise PedigreeError( "Invalid MZ twin character '" + t + "'. MZ twins must be identified using one " + "of the following ASCII characters: " + str(settings.UNIQUE_TWIN_IDS) + ".") # Check that monozygotic (MZ) twin data are consistent if (twins[0].mothid != twins[1].mothid or twins[0].fathid != twins[1].fathid): raise PedigreeError( "Monozygotic (MZ) twins identified with the character '" + t + "' have different " "parents. MZ twins must have the same parents.") if (twins[0].yob != twins[1].yob): raise PedigreeError( "Monozygotic (MZ) twins identified with the character '" + t + "' have different " "years of birth. MZ twins must have the same year of birth." ) # Check that living MZ twins have the same age at last follow up if (twins[0].dead == '0' and twins[1].dead == '0' and twins[0].age != twins[1].age): raise PedigreeError( "Monozygotic (MZ) twins identified with the character '" + t + "' have different " "ages. If both MZ twins are alive, they must have the same age at last follow up." ) if twins[0].sex() != twins[1].sex(): raise PedigreeError( "Monozygotic (MZ) twins identified with the character '" + t + "' have a different " "sex. MZ twins must have the same sex.") # Check that the MZ twins have the same genetic status if not GeneticTest.compareTestResults(twins[0], twins[1]): raise PedigreeError( "Monozygotic (MZ) twins have both had a genetic test, but the genetic test results " "for these individuals are different. Under these circumstances, the genetic test " "results must be the same.") # Check to ensure that the maximum number of MZ twin pairs per pedigree has not been exceeded if len(twin_store.keys()) > settings.MAX_NUMBER_MZ_TWIN_PAIRS: raise PedigreeError( "Maximum number of MZ twin pairs has been exceeded. Input pedigrees must have a " "maximum of " + str(settings.MAX_NUMBER_MZ_TWIN_PAIRS) + " MZ twin pairs.")
def __init__(self, pedigree_records=None, people=None, file_type=None, bc_risk_factor_code=None, oc_risk_factor_code=None, bc_prs=None, oc_prs=None): """ @keyword pedigree_records: the pedigree records section of the BOADICEA import pedigree file. @keyword people: members of the pedigree. @keyword file_type: file type is 'bwa' or 'canrisk'. @keyword bc_risk_factor_code: breast cancer risk factor code @keyword oc_risk_factor_code: ovarian cancer risk factor code @keyword bc_prs: breast cancer PRS @keyword oc_prs: ovarian cancer PRS """ self.people = [] if pedigree_records is not None: self.famid = pedigree_records[0].split()[0] ids = [] for record in pedigree_records: p = Person.factory(record, file_type=file_type) if p.target != '0' and p.target != '1': raise PedigreeError( "A value in the Target data column has been set to '" + p.target + "'. Target column parameters must be set to '0' or '1'." ) if p.is_target(): self.target = p if p.pid in ids: raise PedigreeError( "Individual ID '" + p.pid + "' appears more than once in the pedigree file.") else: ids.append(p.pid) self.people.append(p) if people is not None: self.people.extend(people) self.famid = self.people[0].famid ntarget = 0 for person in self.people: if person.is_target(): ntarget += 1 pedigree_size = len(self.people) if ntarget != 1: raise PedigreeError( "Pedigree (" + self.famid + ") has either no index or more than 1 " + "index individuals. Only one target can be specified.") if pedigree_size > settings.MAX_PEDIGREE_SIZE or pedigree_size < settings.MIN_BASELINE_PEDIGREE_SIZE: raise PedigreeError("Pedigree (" + self.famid + ") has unexpected number of family members " + str(pedigree_size)) if file_type == 'canrisk': if bc_risk_factor_code is not None: self.bc_risk_factor_code = bc_risk_factor_code if oc_risk_factor_code is not None: self.oc_risk_factor_code = oc_risk_factor_code if bc_prs is not None: self.bc_prs = bc_prs if oc_prs is not None: self.oc_prs = oc_prs
def factory(ped_file_line, file_type=None): ''' Factory method for creating types of people given a record from a BOADICEA import pedigree file . @type ped_file_line: str @param ped_file_line: Pedigree file line. ''' cols = ped_file_line.split() famid = cols[0] name = cols[1] pid = cols[3] cancers = Cancers(bc1=Cancer(cols[11] if cols[11] != "0" else "-1"), bc2=Cancer(cols[12] if cols[12] != "0" else "-1"), oc=Cancer(cols[13] if cols[13] != "0" else "-1"), prc=Cancer(cols[14] if cols[14] != "0" else "-1"), pac=Cancer(cols[15] if cols[15] != "0" else "-1")) # use column headers to get gene test type and result if file_type == 'bwa': gtests = BWSGeneticTests.factory([ GeneticTest(cols[BwaPedigree.get_column_idx(gene + 't')], cols[BwaPedigree.get_column_idx(gene + 'r')]) for gene in settings.BC_MODEL['GENES'] ]) pathology = PathologyTests( er=PathologyTest(PathologyTest.ESTROGEN_RECEPTOR_TEST, cols[27]), pr=PathologyTest(PathologyTest.PROGESTROGEN_RECEPTOR_TEST, cols[28]), her2=PathologyTest(PathologyTest.HER2_TEST, cols[29]), ck14=PathologyTest(PathologyTest.CK14_TEST, cols[30]), ck56=PathologyTest(PathologyTest.CK56_TEST, cols[31])) else: genes = settings.BC_MODEL['GENES'] + settings.OC_MODEL['GENES'][2:] def get_genetic_test(cols, gene): idx = CanRiskPedigree.get_column_idx(gene) if idx < 0: raise PedigreeError("Genetic test column for '" + gene + "not found.") gt = cols[idx].split(':') return GeneticTest(gt[0], gt[1]) gtests = CanRiskGeneticTests.factory( [get_genetic_test(cols, gene) for gene in genes]) path = cols[len(CanRiskPedigree.COLUMNS) - 1].split(':') pathology = PathologyTests( er=PathologyTest(PathologyTest.ESTROGEN_RECEPTOR_TEST, path[0]), pr=PathologyTest(PathologyTest.PROGESTROGEN_RECEPTOR_TEST, path[1]), her2=PathologyTest(PathologyTest.HER2_TEST, path[2]), ck14=PathologyTest(PathologyTest.CK14_TEST, path[3]), ck56=PathologyTest(PathologyTest.CK56_TEST, path[4])) if cols[6] == 'M': return Male(famid, name, pid, fathid=cols[4], mothid=cols[5], target=cols[2], dead=cols[8], age=cols[9], yob=cols[10], ashkn=cols[16], cancers=cancers, mztwin=cols[7], gtests=gtests, pathology=pathology) elif cols[6] == 'F': return Female(famid, name, pid, fathid=cols[4], mothid=cols[5], target=cols[2], dead=cols[8], age=cols[9], yob=cols[10], ashkn=cols[16], cancers=cancers, mztwin=cols[7], gtests=gtests, pathology=pathology) else: raise PedigreeError( "The sex of family member '" + name + "' is invalid. An " + "individuals sex must be specified as 'M' or 'F' only.")