def test_write_headers(self): header = _get_vcf_header_from_lines(self.lines) with temp_dir.TempDir() as tempdir: tempfile = tempdir.create_temp_file(suffix='.vcf') header_fn = WriteVcfHeaderFn(tempfile) header_fn.process(header) self._assert_file_contents_equal(tempfile, self.lines)
def test_write_contig(self): self.lines = [ '##contig=<ID=M,length=16,assembly=B37,md5=c6,species="Homosapiens">\n', self.lines[-1], ] header = _get_vcf_header_from_lines(self.lines) header_fn = WriteVcfHeaderFn('') actual = header_fn._to_vcf_header_line('contig', header.contigs.values()[0]) expected = '##contig=<ID=M,length=16>\n' self.assertEqual(actual, expected)
def test_info_source_and_version(self): self.lines = [ '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth",' 'Source="source",Version="version">\n', self.lines[-1] ] header = _get_vcf_header_from_lines(self.lines) header_fn = WriteVcfHeaderFn('') actual = header_fn._to_vcf_header_line('INFO', header.infos.values()[0]) expected = self.lines[0] self.assertEqual(actual, expected)
def test_to_vcf_header_line(self): header_fn = WriteVcfHeaderFn('') header = collections.OrderedDict([ ('id', 'NS'), ('num', 1), ('type', 'Integer'), ('desc', 'Number samples'), ]) expected = ('##INFO=<ID=NS,Number=1,Type=Integer,' 'Description="Number samples">\n') self.assertEqual(header_fn._to_vcf_header_line('INFO', header), expected)
def test_write_info_number_types(self): self.lines = [ '##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n', '##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">\n', '##INFO=<ID=HG,Number=G,Type=Integer,Description="IntInfo_G">\n', '##INFO=<ID=HR,Number=R,Type=String,Description="ChrInfo_R">\n', self.lines[-1], ] header = _get_vcf_header_from_lines(self.lines) header_fn = WriteVcfHeaderFn('') actual = [] for info in list(header.infos.values()): actual.append(header_fn._to_vcf_header_line('INFO', info)) expected = self.lines[:-1] self.assertCountEqual(actual, expected)
def test_write_headers_with_vcf_version_line(self): header = _get_vcf_header_from_lines(self.lines) vcf_version_line = '##fileformat=VCFv4.3\n' expected_results = [ vcf_version_line.encode('utf-8'), b'##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n', b'##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">\n', b'##INFO=<ID=HG,Number=G,Type=Integer,Description="IntInfo_G">\n', b'##INFO=<ID=HR,Number=R,Type=String,Description="ChrInfo_R">\n', b'##FILTER=<ID=MPCBT,Description="Mate pair count below 10">\n', b'##ALT=<ID=INS:ME:MER,Description="Insertion of MER element">\n', b'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n', b'##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="GQ">\n', b'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\n' ] with temp_dir.TempDir() as tempdir: tempfile = tempdir.create_temp_file(suffix='.vcf') header_fn = WriteVcfHeaderFn(tempfile) header_fn.process(header, vcf_version_line) with open(tempfile, 'rb') as f: actual = f.readlines() self.assertCountEqual(actual, expected_results)
def test_raises_error_for_invalid_num(self): header_fn = WriteVcfHeaderFn('') header = collections.OrderedDict([('num', -4)]) with self.assertRaises(ValueError): header_fn._format_header_key_value('num', header['num'])