示例#1
0
文件: test_vcf.py 项目: BadSeby/scout
def test_vcf():
    """Parse the files and add fill the mongo db."""
    families_path = '/vagrant/scout/tests/vcf_examples'
    families = {}
    # print(families_path)
    # print(os.path.exists(families_path))
    # for file in index(families_path):
    #     print(file)
    i = 0
    for root, dirs, files in os.walk(families_path):
        print('root: %s, dirs: %s , files: %s' % (str(root), str(dirs), str(files)))
        for f in files:
            print('File: %s' % f)
            if os.path.splitext(f)[-1] == '.ped':
                if i in families:
                    families[i]['ped'] = os.path.join(root, f)
                else:
                    families[i] = {'ped' : os.path.join(root, f)}
            if os.path.splitext(f)[-1] == '.vcf':
                if i in families:
                    families[i]['vcf'] = os.path.join(root, f)
                else:
                    families[i] = {'vcf' : os.path.join(root, f)}
        i += 1
    for i in families:
        my_family = ped_parser.FamilyParser(families[i]['ped'])
        print(my_family.make_json(), type(my_family.make_json()))
    pp(families)
示例#2
0
def get_family(args):
    """Return the family"""
    family_type = 'ped'
    family_file = args.family_file[0]

    my_family_parser = parser.FamilyParser(family_file, family_type)
    # Stupid thing but for now when we only look at one family
    return my_family_parser.families.popitem()[1]
 def test_standard_trio(self):
     """Test if the file is parsed in a correct way."""
     family_parser = parser.FamilyParser(open(self.trio_file.name, 'r'))
     assert family_parser.header == [
         'family_id', 'sample_id', 'father_id', 'mother_id', 'sex',
         'phenotype'
     ]
     assert 'healthyParentsAffectedSon' in family_parser.families
     assert set(['proband', 'mother', 'father']) == set(
         family_parser.families['healthyParentsAffectedSon'].individuals.
         keys())
     assert set(['proband', 'mother', 'father']) == set(
         family_parser.families['healthyParentsAffectedSon'].trios[0])
示例#4
0
def test_alternative_parser():
    """Test parsing a ped file with alternative formatting."""
    # test default
    with codecs.open('tests/fixtures/alternative.ped', 'r') as handle:
        family_parser = parser.FamilyParser(handle, family_type='alt')

    # we've only loaded one family
    ped = family_parser.families.values()[0]

    assert ped.family_id == 'family_id'
    assert len(ped.individuals) == 1

    sample = ped.individuals.values()[0]
    assert sample.extra_info['Capture_kit'] == 'Agilent_SureSelect.V5'
示例#5
0
    def get_cases(self, cases_path):
        """Take a case file and return the case on the specified format."""

        ########### Loop over the case folders. Structure is described in documentation ###########

        for root, dirs, files in os.walk(cases_path):
            if files:
                ped_file = None
                vcf_file = None
                zipped_vcf_file = None
                case = None
                for file in files:
                    if os.path.splitext(file)[-1] == '.ped':
                        ped_file = os.path.join(root, file)
                        case_parser = ped_parser.FamilyParser(ped_file)
                        case = case_parser.get_json()[0]
                    if os.path.splitext(file)[-1] == '.vcf':
                        vcf_file = os.path.join(root, file)
                    if os.path.splitext(file)[-1] == '.gz':
                        if os.path.splitext(file)[0][-1] == '.gz':
                            zipped_vcf_file = os.path.join(root, file)
                # If no vcf we search for zipped files
                if not vcf_file:
                    vcf_file = zipped_vcf_file
                # If ped and vcf are not found exit:
                if not (ped_file and vcf_file):
                    raise SyntaxError(
                        'Wrong folder structure in vcf directories. '
                        'Could not find ped and/or vcf files. '
                        'See documentation.')
                # Store the path to variants as case id:s:
                case['id'] = case['family_id']
                case['vcf_path'] = vcf_file
                self._cases.append(case)

        return
 def test_standard_trio_proband_missing_column(self):
     """Test if the file is parsed in a correct way."""
     with pytest.raises(WrongLineFormat):
         family_parser = parser.FamilyParser(open(self.trio_file.name, 'r'))
 def test_standard_trio_missing_father(self):
     """Test if the file is parsed in a correct way."""
     with pytest.raises(PedigreeError):
         family_parser = parser.FamilyParser(open(self.trio_file.name, 'r'))