Exemplo n.º 1
0
 def count_reads(self):
     self.should_count_reads = True
     self.counts_dict[self.name] = reads_in_group(
         file_group=self.output_files_local()[0:2],
         cluster_sizes=load_duplicate_cluster_sizes(
             self.input_cluster_sizes_path()),
         cluster_key=lambda x: x)
Exemplo n.º 2
0
 def _count_reads_work(self, cluster_key, counter_name, fasta_files):
     # Count reads including duplicates (expanding cd-hit-dup clusters).
     self.should_count_reads = True
     self.counts_dict[counter_name] = count.reads_in_group(
         file_group=fasta_files,
         cluster_sizes=load_cdhit_cluster_sizes(self.input_cluster_sizes_path()),
         cluster_key=cluster_key)
Exemplo n.º 3
0
 def count_reads(self):
     self.should_count_reads = True
     files_to_count = self.output_files_local()[0:2]
     read_count = count.reads_in_group(files_to_count)
     self.counts_dict[self.name] = read_count
     # If the read count is exactly equal to the maximum allowed number,
     # infer that subsampling occurred:
     max_read_count = len(
         files_to_count) * self.additional_attributes["max_fragments"]
     if read_count == max_read_count:
         self.counts_dict["subsampled"] = 1
Exemplo n.º 4
0
    def count_input_reads(input_files, result_dir_local, result_dir_s3, target_name, max_fragments=None):
        local_input_files = [os.path.join(result_dir_local, f) for f in input_files[0:2]]
        count_file_basename = "%s.count" % target_name
        local_count_file = "%s/%s" % (result_dir_local, count_file_basename)
        s3_count_file = "%s/%s" % (result_dir_s3, count_file_basename)

        read_count = count.reads_in_group(local_input_files, max_fragments=max_fragments)
        counts_dict = {target_name: read_count}
        if read_count == len(local_input_files) * max_fragments:
            # If the number of reads is exactly equal to the maximum we specified,
            # it means that the input has been truncated.
            counts_dict["truncated"] = read_count

        with open(local_count_file, 'w') as count_file:
            json.dump(counts_dict, count_file)
        idseq_dag.util.s3.upload_with_retries(local_count_file, s3_count_file)
Exemplo n.º 5
0
 def count_reads(self):
     self.should_count_reads = True
     self.counts_dict[self.name] = count.reads_in_group(self.output_files_local()[0:2])
 def count_reads(self):
     self.should_count_reads = True
     # Here we intentionally count unique reads.
     self.counts_dict[self.name] = count.reads_in_group(
         self.output_files_local()[:-2])  # last two outputs are not fastas
 def count_reads(self):
     # count unidenfitied reads
     self.should_count_reads = True
     self.counts_dict["unidentified_fasta"] = count.reads_in_group([self.output_files_local()[1]])