def family_from_indiv_id_list(indiv_id_list, project_id, family_id): indivs = [ Individual({ 'project_id': project_id, 'family_id': family_id, 'indiv_id': indiv_id }) for indiv_id in indiv_id_list ] family = Family({'project_id': project_id, 'family_id': family_id}) for indiv in indivs: family.add_individual(indiv) return family
def get_individuals_from_fam_file(fam_file, project_id='.'): """ Returns a list of individuals from a FAM file """ individuals = [] for line in fam_file: try: # ignore these rows if line == '' or line.startswith('#'): continue fields = line.strip('\n').split('\t') indiv_id = slugify(fields[1], separator='_', replace_dot=True) family_id = slugify(fields[0], separator='_', replace_dot=True) paternal_id = slugify(fields[2], separator='_', replace_dot=True) if paternal_id == "0": paternal_id = "." maternal_id = slugify(fields[3], separator='_', replace_dot=True) if maternal_id == "0": maternal_id = "." gender = 'unknown' if fields[4] == '2' or fields[4].upper().startswith('F'): gender = 'female' elif fields[4] == '1' or fields[4].upper().startswith('M'): gender = 'male' affected_status = 'unknown' if fields[5] == '2' or fields[5].upper().startswith('A'): affected_status = 'affected' elif fields[5] == '1' or fields[5].upper().startswith('U'): affected_status = 'unaffected' except Exception as e: raise ValueError( "Couldn't parse line: %(line)s. Fields: %(fields)s. exception: %(e)s" % locals()) indiv = Individual( indiv_id, project_id=project_id, family_id=family_id, paternal_id=paternal_id, maternal_id=maternal_id, gender=gender, affected_status=affected_status, ) individuals.append(indiv) return individuals
def get_control_cohort(self, population): indiv_id_list = self.get_individuals_for_family(population, 'control_cohort') individuals = [Individual(indiv_id, affected_status='affected') for indiv_id in indiv_id_list] cohort = Cohort('control_cohort', individuals, project_id=population) return cohort