def test_vcf(): """Parse the files and add fill the mongo db.""" families_path = '/vagrant/scout/tests/vcf_examples' families = {} # print(families_path) # print(os.path.exists(families_path)) # for file in index(families_path): # print(file) i = 0 for root, dirs, files in os.walk(families_path): print('root: %s, dirs: %s , files: %s' % (str(root), str(dirs), str(files))) for f in files: print('File: %s' % f) if os.path.splitext(f)[-1] == '.ped': if i in families: families[i]['ped'] = os.path.join(root, f) else: families[i] = {'ped' : os.path.join(root, f)} if os.path.splitext(f)[-1] == '.vcf': if i in families: families[i]['vcf'] = os.path.join(root, f) else: families[i] = {'vcf' : os.path.join(root, f)} i += 1 for i in families: my_family = ped_parser.FamilyParser(families[i]['ped']) print(my_family.make_json(), type(my_family.make_json())) pp(families)
def get_family(args): """Return the family""" family_type = 'ped' family_file = args.family_file[0] my_family_parser = parser.FamilyParser(family_file, family_type) # Stupid thing but for now when we only look at one family return my_family_parser.families.popitem()[1]
def test_standard_trio(self): """Test if the file is parsed in a correct way.""" family_parser = parser.FamilyParser(open(self.trio_file.name, 'r')) assert family_parser.header == [ 'family_id', 'sample_id', 'father_id', 'mother_id', 'sex', 'phenotype' ] assert 'healthyParentsAffectedSon' in family_parser.families assert set(['proband', 'mother', 'father']) == set( family_parser.families['healthyParentsAffectedSon'].individuals. keys()) assert set(['proband', 'mother', 'father']) == set( family_parser.families['healthyParentsAffectedSon'].trios[0])
def test_alternative_parser(): """Test parsing a ped file with alternative formatting.""" # test default with codecs.open('tests/fixtures/alternative.ped', 'r') as handle: family_parser = parser.FamilyParser(handle, family_type='alt') # we've only loaded one family ped = family_parser.families.values()[0] assert ped.family_id == 'family_id' assert len(ped.individuals) == 1 sample = ped.individuals.values()[0] assert sample.extra_info['Capture_kit'] == 'Agilent_SureSelect.V5'
def get_cases(self, cases_path): """Take a case file and return the case on the specified format.""" ########### Loop over the case folders. Structure is described in documentation ########### for root, dirs, files in os.walk(cases_path): if files: ped_file = None vcf_file = None zipped_vcf_file = None case = None for file in files: if os.path.splitext(file)[-1] == '.ped': ped_file = os.path.join(root, file) case_parser = ped_parser.FamilyParser(ped_file) case = case_parser.get_json()[0] if os.path.splitext(file)[-1] == '.vcf': vcf_file = os.path.join(root, file) if os.path.splitext(file)[-1] == '.gz': if os.path.splitext(file)[0][-1] == '.gz': zipped_vcf_file = os.path.join(root, file) # If no vcf we search for zipped files if not vcf_file: vcf_file = zipped_vcf_file # If ped and vcf are not found exit: if not (ped_file and vcf_file): raise SyntaxError( 'Wrong folder structure in vcf directories. ' 'Could not find ped and/or vcf files. ' 'See documentation.') # Store the path to variants as case id:s: case['id'] = case['family_id'] case['vcf_path'] = vcf_file self._cases.append(case) return
def test_standard_trio_proband_missing_column(self): """Test if the file is parsed in a correct way.""" with pytest.raises(WrongLineFormat): family_parser = parser.FamilyParser(open(self.trio_file.name, 'r'))
def test_standard_trio_missing_father(self): """Test if the file is parsed in a correct way.""" with pytest.raises(PedigreeError): family_parser = parser.FamilyParser(open(self.trio_file.name, 'r'))