def missing_from_vcf(family_set, vcf_file): """ Get indiv_ids in the VCF file that are not accounted for by the individuals in family_set Can be extra indivs in families """ not_seen = set() for family in family_set: for indiv_id in family['individuals']: not_seen.add(indiv_id) for indiv_id in set(vcf_stuff.get_ids_from_vcf_path(vcf_file)): not_seen.discard(indiv_id) return list(not_seen)
def extra_indivs_in_vcf(family_set, vcf_file): """ Get indiv_ids in the VCF file that are not accounted for by the individuals in family_set Can be extra indivs in families """ ids_in_vcf = vcf_stuff.get_ids_from_vcf_path(vcf_file) seen = {indiv_id: False for indiv_id in ids_in_vcf} for family in family_set: for indiv_id in family['individuals']: if indiv_id in seen: seen[indiv_id] = True return [i for i in seen if seen[i] == False]
def sample_id_list(self): return vcf_stuff.get_ids_from_vcf_path(self.path())