def test_calculations2(self): """ Test prediction of cancer risk and mutation probability. """ pedigree = deepcopy(self.pedigree) target = pedigree.get_target() target.age = str(int(target.age) + 43) # sister # diagnoses = CancerDiagnoses(bc1=Cancer("20"), bc2=Cancer(), oc=Cancer(), # prc=Cancer(), pac=Cancer()) # sister = Female("FAM1", "F01", "0011", target.fathid, target.mothid, age="22", yob=str(self.year-23), # cancers=Cancers(diagnoses=diagnoses)) # pedigree.people.append(sister) # parents mother = pedigree.get_person(target.mothid) mother.yob = str(self.year - 84) mother.age = "85" mother.cancers = Cancers(bc1=Cancer("52"), bc2=Cancer(), oc=Cancer(), prc=Cancer(), pac=Cancer()) # maternal grandparents (_maternal_grandfather, maternal_grandmother) = pedigree.add_parents(mother) maternal_grandmother.age = "81" maternal_grandmother.yob = "1912" maternal_grandmother.dead = "1" maternal_grandmother.cancers = Cancers(bc1=Cancer("42"), bc2=Cancer(), oc=Cancer(), prc=Cancer(), pac=Cancer()) PedigreeFile.validate(pedigree) calcs = Predictions(pedigree, cwd=self.cwd) # each gene should have a mutation probability plus a result for no mutations for mp in calcs.mutation_probabilties: key = list(mp.keys())[0] self.assertTrue(key in settings.BC_MODEL['GENES'] or key == "no mutation") self.assertEqual(len(calcs.mutation_probabilties), len(settings.BC_MODEL['GENES']) + 1) # risks calculated at different ages: self.assertEqual(len(calcs.cancer_risks), 9) self.assertTrue([c.get('age') for c in calcs.cancer_risks] == [64, 65, 66, 67, 68, 70, 73, 75, 80])
def test_affected_unknown(self): """ Test including affected unknown for mother of target to show it increases breast cancer risk. """ pedigree = deepcopy(self.pedigree) target = pedigree.get_target() mother = pedigree.get_person(target.mothid) mother.yob = str(self.year - 55) mother.age = "55" calcs1 = Predictions(pedigree, cwd=self.cwd) def get_c80(calcs): for c in calcs.cancer_risks: if c.get('age') == 80: return c['breast cancer risk']['decimal'] return None # add affected unknown to mother mother.cancers = Cancers(bc1=Cancer("AU"), bc2=Cancer(), oc=Cancer(), prc=Cancer(), pac=Cancer()) calcs2 = Predictions(pedigree, cwd=self.cwd) self.assertGreater( get_c80(calcs2), get_c80(calcs1), 'Mother affected unknown increases BC risk in target')
def __init__(self, famid, name, pid, fathid, mothid, target="0", dead="0", age="0", yob="0", ashkn="0", mztwin="0", cancers=Cancers(), gtests=BWSGeneticTests.default_factory(), pathology=PathologyTest.factory_default()): """ @type famid: str @param famid: family/pedigree ID @type pid: str @param pid: person ID @type fathid: str @keyword fathid: father ID @type mothid: str @keyword mothid: mother ID @type target: str @keyword target: subject of risk calculation @type dead: str @keyword dead: alive specified as '0' and dead as '1' @type age: str @keyword age: age at last follow up or age at death @type yob: str @keyword yob: year of birth @type ashkn: str @keyword ashkn: Ashkenazi origin parameter: '1' for Ashkenazi origin else '0' @type mztwin: str @keyword mztwin: monozygotic (identical) twin @type cancers: Cancers @keyword cancers: cancer status @type gtest: GeneticTests @keyword gtest: genetic tests @type pathology: PathologyResult @keyword pathology: pathology test results """ self.famid = famid.replace( "-", "")[:8] # remove hyphen and restrict to 8 chars self.name = name[:8] self.pid = pid self.fathid = fathid self.mothid = mothid self.target = target self.dead = dead self.age = age self.yob = yob self.ashkn = ashkn self.mztwin = mztwin self.cancers = cancers # cancers self.gtests = gtests # genetic tests self.pathology = pathology
def _get_pedi(self): t = self.predictions.pedi.get_target() if t.cancers.is_cancer_diagnosed(): cancers = Cancers(bc1=Cancer(t.cancers.diagnoses.bc1.age), bc2=Cancer(), oc=Cancer(), prc=Cancer(), pac=Cancer()) else: cancers = Cancers() if self.predictions.model_settings['NAME'] == 'BC': gtests = BWSGeneticTests.default_factory() else: gtests = CanRiskGeneticTests.default_factory() if t.sex() is "M": new_t = Male(t.famid, t.name, t.pid, "", "", target=t.target, dead="0", age=t.age, yob=t.yob, cancers=cancers, gtests=gtests) else: new_t = Female(t.famid, t.name, t.pid, "", "", target=t.target, dead="0", age=t.age, yob=t.yob, cancers=cancers, gtests=gtests) if self.predictions.model_settings['NAME'] == 'BC': return BwaPedigree(people=[new_t]) else: return CanRiskPedigree(people=[new_t])
def setUp(self): ''' Build pedigree data. ''' self.year = date.today().year target = Female("FAM1", "F0", "001", "002", "003", target="1", age="20", yob=str(self.year-20), cancers=Cancers()) self.pedigree = BwaPedigree(people=[target]) # parents (_father, _mother) = self.pedigree.add_parents(target) self.cwd = tempfile.mkdtemp(prefix="TEST_", dir="/tmp")
def test_niceness(self): """ Test niceness level for pedigree with sibling and large pedigree. """ pedigree = deepcopy(self.pedigree) self.assertEqual(Predictions._get_niceness(pedigree, factor=1), len(pedigree.people)) self.assertEqual(Predictions._get_niceness(pedigree, factor=0.01), 19) # sister target = pedigree.get_target() sister = Female("FAM1", "F01", "0011", target.fathid, target.mothid, age="22", yob=str(self.year-23), cancers=Cancers(bc1=Cancer("20"))) pedigree.people.append(sister) self.assertEqual(Predictions._get_niceness(pedigree), 1)
def setUp(self): ''' Build pedigree data. ''' self.year = date.today().year target = Female("FAM1", "F0", "001", "002", "003", target="1", age="20", yob=str(self.year - 20), cancers=Cancers()) self.pedigree = BwaPedigree(people=[target]) # parents (_father, _mother) = self.pedigree.add_parents(target) # canrisk pedigree target = Female("FAM1", "F0", "001", "002", "003", target="1", age="20", yob=str(self.year - 20), cancers=Cancers(), gtests=CanRiskGeneticTests.default_factory()) self.canrisk_pedigree = CanRiskPedigree(people=[target]) (_father, _mother) = self.canrisk_pedigree.add_parents( target, gtests=CanRiskGeneticTests.default_factory()) _mother.yob = str(self.year - 55) _mother.age = "55" _mother.cancers = Cancers(bc1=Cancer("54"), bc2=Cancer("55"), oc=Cancer(), prc=Cancer(), pac=Cancer()) self.cwd = tempfile.mkdtemp(prefix="TEST_", dir="/tmp")
def _get_pedi(self): t = super()._get_pedi().get_target() cancers = Cancers() if self.predictions.model_settings['NAME'] == 'BC': gtests = BWSGeneticTests.default_factory() else: gtests = CanRiskGeneticTests.default_factory() if t.sex() is "M": new_t = Male(t.famid, t.name, t.pid, "", "", target=t.target, dead="0", age=t.age, yob=t.yob, cancers=cancers, gtests=gtests) else: new_t = Female(t.famid, t.name, t.pid, "", "", target=t.target, dead="0", age=t.age, yob=t.yob, cancers=cancers, gtests=gtests) if self.predictions.model_settings['NAME'] == 'BC': return BwaPedigree(people=[new_t]) else: return CanRiskPedigree(people=[new_t])
def test_ovarian_calculations(self): """ Test prediction of cancer risk and mutation probability. """ target = Female("FAM1", "F0", "001", "002", "003", target="1", age="20", yob=str(self.year - 20), cancers=Cancers(), gtests=CanRiskGeneticTests.default_factory()) pedigree = CanRiskPedigree(people=[target]) # parents (_father, _mother) = pedigree.add_parents( target, gtests=CanRiskGeneticTests.default_factory()) PedigreeFile.validate(pedigree) params = ModelParams( mutation_frequency=settings.OC_MODEL['MUTATION_FREQUENCIES']["UK"], mutation_sensitivity=settings.OC_MODEL['GENETIC_TEST_SENSITIVITY']) calcs = Predictions(pedigree, cwd=self.cwd, model_params=params, model_settings=settings.OC_MODEL, calcs=[]) # each gene should have a mutation probability plus a result for no mutations for mp in calcs.mutation_probabilties: key = list(mp.keys())[0] self.assertTrue(key in settings.OC_MODEL['GENES'] or key == "no mutation") self.assertEqual(len(calcs.mutation_probabilties), len(settings.OC_MODEL['GENES']) + 1) # risks calculated at 16 different ages: self.assertEqual(len(calcs.cancer_risks), 16) self.assertTrue( [c.get('age') for c in calcs.cancer_risks] == [21, 22, 23, 24, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80])
def factory(ped_file_line, file_type=None): ''' Factory method for creating types of people given a record from a BOADICEA import pedigree file . @type ped_file_line: str @param ped_file_line: Pedigree file line. ''' cols = ped_file_line.split() famid = cols[0] name = cols[1] pid = cols[3] cancers = Cancers(bc1=Cancer(cols[11] if cols[11] != "0" else "-1"), bc2=Cancer(cols[12] if cols[12] != "0" else "-1"), oc=Cancer(cols[13] if cols[13] != "0" else "-1"), prc=Cancer(cols[14] if cols[14] != "0" else "-1"), pac=Cancer(cols[15] if cols[15] != "0" else "-1")) # use column headers to get gene test type and result if file_type == 'bwa': gtests = BWSGeneticTests.factory([ GeneticTest(cols[BwaPedigree.get_column_idx(gene + 't')], cols[BwaPedigree.get_column_idx(gene + 'r')]) for gene in settings.BC_MODEL['GENES'] ]) pathology = PathologyTests( er=PathologyTest(PathologyTest.ESTROGEN_RECEPTOR_TEST, cols[27]), pr=PathologyTest(PathologyTest.PROGESTROGEN_RECEPTOR_TEST, cols[28]), her2=PathologyTest(PathologyTest.HER2_TEST, cols[29]), ck14=PathologyTest(PathologyTest.CK14_TEST, cols[30]), ck56=PathologyTest(PathologyTest.CK56_TEST, cols[31])) else: genes = settings.BC_MODEL['GENES'] + settings.OC_MODEL['GENES'][2:] def get_genetic_test(cols, gene): idx = CanRiskPedigree.get_column_idx(gene) if idx < 0: raise PedigreeError("Genetic test column for '" + gene + "not found.") gt = cols[idx].split(':') return GeneticTest(gt[0], gt[1]) gtests = CanRiskGeneticTests.factory( [get_genetic_test(cols, gene) for gene in genes]) path = cols[len(CanRiskPedigree.COLUMNS) - 1].split(':') pathology = PathologyTests( er=PathologyTest(PathologyTest.ESTROGEN_RECEPTOR_TEST, path[0]), pr=PathologyTest(PathologyTest.PROGESTROGEN_RECEPTOR_TEST, path[1]), her2=PathologyTest(PathologyTest.HER2_TEST, path[2]), ck14=PathologyTest(PathologyTest.CK14_TEST, path[3]), ck56=PathologyTest(PathologyTest.CK56_TEST, path[4])) if cols[6] == 'M': return Male(famid, name, pid, fathid=cols[4], mothid=cols[5], target=cols[2], dead=cols[8], age=cols[9], yob=cols[10], ashkn=cols[16], cancers=cancers, mztwin=cols[7], gtests=gtests, pathology=pathology) elif cols[6] == 'F': return Female(famid, name, pid, fathid=cols[4], mothid=cols[5], target=cols[2], dead=cols[8], age=cols[9], yob=cols[10], ashkn=cols[16], cancers=cancers, mztwin=cols[7], gtests=gtests, pathology=pathology) else: raise PedigreeError( "The sex of family member '" + name + "' is invalid. An " + "individuals sex must be specified as 'M' or 'F' only.")
def convert2csv(filename, csvfilename, censoring_ages_freq=[1, 5, 10]): ''' Convert pedigree file to a CSV file for batch processing @param filename - pedigree file name @param csvfilename - name of output CSV file @param censoring_ages - object of risk factors (e.g args.height) ''' with open(filename, 'r') as f: pedigree_data = f.read() f.close() pf = PedigreeFile(pedigree_data) cheaders = get_rf_values(pedigree_data) genes = Genes.get_all_model_genes() hdr = [ "FamID", "Name", "Proband", "IndivID", "FathID", "MothID", "Sex", "MZtwin", "Dead", "Age", "Yob", "BrCa_1st", "BrCa_2nd", "OvCa", "ProCa", "PanCa", "Ashkn" ] for gene in genes: hdr.extend([gene + "t", gene + "r"]) hdr.extend(["ER", "PR", "HER2", "CK14", "CK56", "Censoring_Age"]) hdr += RISK_FACTORS # add risk factors to the header csv_file = open(csvfilename, "w") for i in range(len(hdr)): print(hdr[i], file=csv_file, end="," if i < len(hdr) - 1 else "") print('', file=csv_file) for ped in pf.pedigrees: trgt = ped.get_target() tage = int(trgt.age) alf = tage calc_ages = [] while alf <= 79: alf += 1 if (alf - tage in censoring_ages_freq): calc_ages.append(str(alf)) calc_ages.append("80") for censoring_age in calc_ages: for person in ped.people: print(person.famid + ":" + censoring_age, file=csv_file, end=",") print(person.name, file=csv_file, end=",") print(person.target if person.target == "1" else "", file=csv_file, end=",") print(person.pid, file=csv_file, end=",") print(person.fathid if person.fathid != "0" else "", file=csv_file, end=",") print(person.mothid if person.mothid != "0" else "", file=csv_file, end=",") print(person.sex(), file=csv_file, end=",") print(person.mztwin if person.mztwin != "0" else "", file=csv_file, end=",") print(person.dead, file=csv_file, end=",") print(person.age, file=csv_file, end=",") print(person.yob if person.yob != "0" else "", file=csv_file, end=",") cancers = person.cancers d = cancers.diagnoses age = "" [ print((getattr(d, c).age if getattr(d, c).age != 'AU' and getattr(d, c).age != '-1' else age), file=csv_file, end=",") for c in Cancers.get_cancers() ] print(person.ashkn, file=csv_file, end=",") gtests = person.gtests for g in genes: try: gt = getattr(gtests, g.lower()) print(gt.test_type if gt.test_type != "0" else "", file=csv_file, end=",") print(gt.result if gt.result != "0" else "", file=csv_file, end=",") except AttributeError: raise p = person.pathology print(p.er.result if p.er.result != "0" else "", file=csv_file, end=",") print(p.pr.result if p.pr.result != "0" else "", file=csv_file, end=",") print(p.her2.result if p.her2.result != "0" else "", file=csv_file, end=",") print(p.ck14.result if p.ck14.result != "0" else "", file=csv_file, end=",") print(p.ck56.result if p.ck56.result != "0" else "", file=csv_file, end=",") print((censoring_age if person.target == "1" else ""), file=csv_file, end="") this_rfs = None if person.famid in cheaders: this_rfs = cheaders[person.famid] for rf in RISK_FACTORS: if person.target == "1": if this_rfs is not None: if rf in this_rfs: print("," + this_rfs[rf], file=csv_file, end="") else: print(",", file=csv_file, end="") else: print(",", file=csv_file, end="") else: print(",", file=csv_file, end="") print('', file=csv_file) csv_file.close()