def execute(self): counter = 0 for vcf_file_name in self.vcf_file_names: # TODO This is very similar to what we do in vcfpaste # Should abstract out in both cases so there's less repeated code input_stream = None if vcf_file_name.endswith('.gz'): input_stream = gzip.open(vcf_file_name, 'rb') else: input_stream = open(vcf_file_name, 'r') samples = l_bp.parse_vcf(input_stream, self.vcf_lines, self.vcf_headers, include_ref=self.include_ref) for sample in samples: self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n") self.has_genotypes = True counter += 1 if counter > self.batchsize: self.vcf_lines.sort(key=l_bp.vcf_line_key) self.write_temp_file() counter = 0 # no need to write the final batch to file # FIXME Replace this with a new VCF class with the headers all added self.write_header() self.vcf_lines.sort(key=l_bp.vcf_line_key) iterables = self.temp_files + [self.vcf_lines] self.output_handle.writelines(merge(*iterables)) self.close_tempfiles()
def execute(self): counter = 0 samples_name_list = [] # Avoid same sample lines -D for vcf_file_name in self.vcf_file_names: input_stream = InputStream(vcf_file_name, self.tempdir) samples = l_bp.parse_vcf(input_stream, self.vcf_lines, self.vcf_headers, include_ref=self.include_ref) for sample in samples: if sample not in samples_name_list and sample != 'VARIOUS': self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n") samples_name_list.append(sample) self.has_genotypes = True else: self.has_genotypes = True counter += 1 if counter > self.batchsize: self.vcf_lines.sort(key=l_bp.vcf_line_key) self.write_temp_file() counter = 0 # no need to write the final batch to file # FIXME Replace this with a new VCF class with the headers all added self.write_header() self.vcf_lines.sort(key=l_bp.vcf_line_key) iterables = self.temp_files + [self.vcf_lines] self.output_handle.writelines(merge(*iterables)) self.close_tempfiles()
def execute(self): counter = 0 for vcf_file_name in self.vcf_file_names: samples = l_bp.parse_vcf(vcf_file_name, self.vcf_lines, self.vcf_headers) for sample in samples: self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n") counter += 1 if counter > self.batchsize: self.vcf_lines.sort(key=l_bp.vcf_line_key) self.write_temp_file() counter = 0 # no need to write the final batch to file self.write_header() self.vcf_lines.sort(key=l_bp.vcf_line_key) iterables = self.temp_files + [self.vcf_lines] sys.stdout.writelines(merge(*iterables))
def execute(self): counter = 0 for vcf_file_name in self.vcf_file_names: samples = l_bp.parse_vcf(vcf_file_name, self.vcf_lines, self.vcf_headers) for sample in samples: self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n") counter += 1 if counter > self.batchsize: self.vcf_lines.sort(key=l_bp.vcf_line_key) self.write_temp_file() counter = 0 # no need to write the final batch to file self.write_header() self.vcf_lines.sort(key=l_bp.vcf_line_key) iterables = self.temp_files + [self.vcf_lines] self.output_handle.writelines(merge(*iterables)) self.close_tempfiles()