Exemplo n.º 1
0
def make1csv_errfree(output_csv_filename, sim_args_tsv):
    """
    Puts all the collate_dnds, full population csv, expected dnds info into 1 csv for checking what causes inaccurate
    inferred dn/ds.
    Instead of putting read sequencing umberjack results in the csv, instead it puts umberjack results
    from the full population.  Ie the windows are made on the sequences from the full population instead of the ART reads.

    :return:
    """
    LOGGER.debug("Writing all collated inferred, expected dnds to " + output_csv_filename)
    with open(output_csv_filename, 'w') as fh_out:
        writer = csv.DictWriter(fh_out, fieldnames=["Window_Start",
                                                    "Window_End",
                                                    "CodonSite",
                                                    "File",
                                                    "Is_Break",  # whether the site is a recombinant breakpoint (start of new strand)
                                                    "BreakRatio.Act",  # sum across breakpoints (ratio of bases on either side of breakpoint)
                                                    "Reads.Act", # max read depth for entire slice
                                                    "UnambigCodonRate.Act", # Total unambiguous codon (depth) at the codon site / max read depth for entire slice
                                                    "AADepth.Act",  # Total codons that code for only 1 amino acid at the codon site
                                                    "PopSize.Act",  # Population size
                                                    "ConserveCodon.Act",
                                                    "EntropyCodon.Act",  # Excludes codons with N's and gaps
                                                    "UnknownPerCodon.Act",  # Average N or gaps per codon at this site
                                                    "ErrPerCodon.Act",  # Average erroneous bases per codon at this site
                                                    "N.Act", "S.Act",
                                                    "EN.Act", "ES.Act",
                                                    "dNdS.Act",
                                                    "dN_minus_dS.Act",
                                                    "TreeLen.Act",  # length of window tree in nucleotide subs/site
                                                    "TreeDepth.Act",  # depth of longest branch in nucleotide subs/site
                                                    "Polytomy.Act",
                                                    # distance from actual to expected tree in Robinson Foulds-branch lengths /reads
                                                    "TreeDistPerRead.Act",
                                                    "ConserveCodon.Exp",
                                                    "EntropyCodon.Exp",
                                                    "N.Exp", "S.Exp",
                                                    "EN.Exp", "ES.Exp",
                                                    "dNdS.Exp",
                                                    "dN_minus_dS.Exp"
                                                    ])

        writer.writeheader()


        popn_groups, umberjack_group_to_args = run_sliding_window_tree.parse_sim_args_tsv(sim_args_tsv)
        for umberjackgroup, popn_groups_per_ugroup in umberjack_group_to_args.iteritems():
            for popn_group in popn_groups_per_ugroup:
                # /home/thuy/gitrepo/Umberjack_Benchmark/simulations/data/simdatasetname
                sim_popn_name = popn_group.dataset
                sim_data = SimData(popn_group.config_file)
                sim_data_dir = sim_data.sim_data_dir


                # /home/thuy/gitrepo/Umberjack_Benchmark/simulations/data/simdatasetname/subs/simdatasetname.dnds.tsv
                full_popn_dnds_tsv = sim_data_dir + os.sep + "subs" + os.sep + sim_popn_name + ".dnds.tsv"
                # /home/thuy/gitrepo/Umberjack_Benchmark/simulations/data/simdatasetname/fullpopn/simdatasetname.conserve.csv
                full_popn_conserve_csv = sim_data_dir + os.sep + "fullpopn" + os.sep + sim_popn_name + "_TRUE.conserve.csv"


                # Instead of using umberjack window fasta made from reads, we use window made from error free ART reads with perfect alignment
                window_start = 1
                window_end = NUCSITES
                umberjack_output_prefix = DATASET_OUT_DIR + os.sep + sim_popn_name + os.sep + "errfree" + os.sep + sim_popn_name + ".repro.errfree.{}_{}".format(window_start, window_end)
                window_fasta = umberjack_output_prefix + ".fasta"
                window_treefile = umberjack_output_prefix + ".nwk"
                window_dnds_tsv = umberjack_output_prefix + ".dnds.tsv"



                LOGGER.debug("Merge sim_name=" + sim_popn_name + " full popn window dnds tsv =" + window_dnds_tsv)

                total_indiv = popn_group.indiv
                total_codon_sites = popn_group.codonsites

                #CodonSite	ConserveCodon	Entropy	NucDepth	CodonDepth
                codonsite_2_full_cons = collect_training.read_codon_csv(csv_file=full_popn_conserve_csv, codon_site_field="CodonSite", is_base0=False)

                # File,Window_Start,Window_End,Reads,CodonSite,CodonDepth,AADepth,ConserveAllCodon,EntropyAllCodon,ConserveCodon,EntropyCodon,N,S,EN,ES,dN,dS,dN_minus_dS,Ambig,Pad,Err,Err_N,Err_S,Ambig_N,Ambig_S,TreeLen,T

                # Site	Observed S Changes	Observed NS Changes	E[S Sites]	E[NS Sites]	dS	dN	dN-dS	Scaled dN-dS
                codonsite_2_full_dnds= collect_training.read_codon_csv(csv_file=full_popn_dnds_tsv, codon_site_field="Site", is_base0=True, delimiter="\t")

                if (len(codonsite_2_full_dnds.keys()) != len(codonsite_2_full_cons.keys()) or
                            len(codonsite_2_full_dnds.keys()) != total_codon_sites or
                            len(codonsite_2_full_cons.keys()) != total_codon_sites):
                    raise ValueError("full population dnds does not have same number of codon sites as conservation:",
                                     full_popn_dnds_tsv, ", ", full_popn_conserve_csv)

                aln = Utility.Consensus()
                aln.parse(msa_fasta_filename=window_fasta)
                window_reads = aln.get_total_seqs()
                window_tree_dist = TestTopology.calc_window_tree_dist(sim_data=sim_data,
                                       window_fasta=window_fasta,
                                       window_treefile=window_treefile,
                                       win_start=window_start, win_end=window_end)
                full_popn_breaks = sim_data.get_recombo_breaks()

                break_ratio = collect_training.get_break_ratio(sim_data=sim_data, win_start=window_start, win_end=window_end)

                polytomy_brlen_thresh = 1.0/(3 * total_codon_sites)  # branch length treshold below which node is considered polytomy
                window_treelen, window_treedepth, total_polytomies = collect_training.get_tree_len_depth(window_fasta, polytomy_brlen_thresh=polytomy_brlen_thresh)

                with open(window_dnds_tsv, 'rU') as fh_actual:
                    reader_act = csv.DictReader(fh_actual, delimiter="\t")
                    for row_idx, row_act in enumerate(reader_act):
                        act_codonsite_offset_base0 = int(row_act["Site"])
                        act_codonsite_base0 = act_codonsite_offset_base0 + window_start - 1


                        act_nucsite_offset_base0 = act_codonsite_base0 * 3
                        codonsite_base1 = act_codonsite_base0 + 1
                        unambig_codon_depth = aln.get_codon_depth(codon_pos_0based=act_codonsite_offset_base0, is_count_ambig=False, is_count_gaps=False, is_count_pad=False)
                        outrow = dict()
                        outrow["Window_Start"] = window_start
                        outrow["Window_End"] = window_end
                        outrow["CodonSite"] = codonsite_base1
                        outrow["File"] = "ErrFree_" + sim_data.name
                        outrow["Reads.Act"] = window_reads
                        outrow["UnambigCodonRate.Act"] = float(unambig_codon_depth)/window_reads
                        outrow["AADepth.Act"]  = aln.get_unambig_codon2aa_depth(codon_pos_0based=act_codonsite_offset_base0)
                        outrow["PopSize.Act"] = total_indiv
                        outrow["ConserveCodon.Act"] = aln.get_codon_conserve(codon_pos_0based=act_codonsite_offset_base0,
                                                                             is_count_ambig=False, is_count_gaps=False, is_count_pad=False)
                        outrow["EntropyCodon.Act"] = aln.get_codon_shannon_entropy(codon_pos_0based=act_codonsite_offset_base0,
                                                                             is_count_ambig=False, is_count_gaps=False, is_count_pad=False)
                        outrow["UnknownPerCodon.Act"] = float(aln.get_gap_count(pos_0based=act_nucsite_offset_base0) +
                                                              aln.get_ambig_count(pos_0based=act_nucsite_offset_base0) +
                                                              aln.get_pad_count(pos_0based=act_nucsite_offset_base0)) / window_reads
                        outrow["ErrPerCodon.Act"] = 0
                        # If it never made it past FastTree into hyphy, then the substitutions will be empty string
                        if row_act[hyphy.hyphy_handler.HYPHY_TSV_N_COL] and row_act[hyphy.hyphy_handler.HYPHY_TSV_S_COL]:
                            outrow["N.Act"] = float(row_act[hyphy.hyphy_handler.HYPHY_TSV_N_COL])
                            outrow["S.Act"] = float(row_act[hyphy.hyphy_handler.HYPHY_TSV_S_COL])
                            outrow["EN.Act"] = float(row_act[hyphy.hyphy_handler.HYPHY_TSV_EXP_N_COL])
                            outrow["ES.Act"] = float(row_act[hyphy.hyphy_handler.HYPHY_TSV_EXP_S_COL])
                            if row_act["dS"] and float(row_act[hyphy.hyphy_handler.HYPHY_TSV_S_COL]) != 0:
                                outrow["dNdS.Act"] = float(row_act[hyphy.hyphy_handler.HYPHY_TSV_DN_COL])/float(row_act[hyphy.hyphy_handler.HYPHY_TSV_DS_COL])
                            outrow["dN_minus_dS.Act"] = row_act[hyphy.hyphy_handler.HYPHY_TSV_SCALED_DN_MINUS_DS_COL]
                        outrow["TreeLen.Act"] = window_treelen
                        outrow["TreeDepth.Act"] = window_treedepth
                        outrow["TreeDistPerRead.Act"] = float(window_tree_dist)/window_reads

                        outrow["Is_Break"] = 0
                        for nuc_strand_start_wrt_ref_base1, nuc_strand_end_wrt_ref_base1 in full_popn_breaks:
                            nuc_pos_wrt_ref_base1 = window_start + act_nucsite_offset_base0
                            # If there are no recombination breaks, full_popn_breaks still contains the full genome as a contiguous section
                            # Don't consider first position as breakpoint
                            if len(full_popn_breaks) > 1 and nuc_pos_wrt_ref_base1 == nuc_strand_start_wrt_ref_base1 > 1:
                                outrow["Is_Break"] = 1

                        outrow["BreakRatio.Act"] = break_ratio
                        outrow["Polytomy.Act"] = total_polytomies

                        if not codonsite_2_full_cons.get(codonsite_base1):
                            raise ValueError("Missing codon site" + str(codonsite_base1) + " in " + full_popn_conserve_csv)
                        outrow["ConserveCodon.Exp"] = codonsite_2_full_cons[codonsite_base1]["ConserveCodon"]
                        outrow["EntropyCodon.Exp"] = codonsite_2_full_cons[codonsite_base1]["EntropyCodon"]

                        if not codonsite_2_full_dnds.get(codonsite_base1):
                            raise ValueError("Missing codon site" + str(codonsite_base1) + " in " + window_dnds_tsv)

                        outrow["N.Exp"] = codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_N_COL]
                        outrow["S.Exp"] = codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_S_COL]
                        outrow["EN.Exp"] = codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_EXP_N_COL]
                        outrow["ES.Exp"] = codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_EXP_S_COL]

                        if (codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_S_COL] and
                                    float(codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_S_COL]) != 0):
                            outrow["dNdS.Exp"] = (float(codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_DN_COL])/
                                                  float(codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_DS_COL]))

                        outrow["dN_minus_dS.Exp"] = codonsite_2_full_dnds[codonsite_base1][hyphy.hyphy_handler.HYPHY_TSV_SCALED_DN_MINUS_DS_COL]

                        writer.writerow(outrow)
Exemplo n.º 2
0
def collect_dnds(output_dir,
                 output_csv_filename,
                 sim_data_config,
                 comments=None):
    """
    Collects everything related to dnds into 1 table.  Does not do any aggregation of values.  Useful for debugging.
    :return:
    """
    LOGGER.debug("Collect dnds for " + output_csv_filename)
    with open(output_csv_filename, 'w') as fh_out:

        sim_data = SimData(sim_data_config)
        full_popn_fasta = sim_data.get_fasta()
        full_popn_breaks = sim_data.get_recombo_breaks()

        if comments:
            fh_out.write(comments)

        writer = csv.DictWriter(
            fh_out,
            fieldnames=[
                "Window_Start",
                "Window_End",
                "Reads",  # Max read depth for the window (not necessary for the codon site)
                "CodonSite",  # 1-based codon site
                "CodonDepth",  # Total unambiguous codon (depth) at the codon site
                "AADepth",  # Total depth of codons that code unambiguously for 1 AA.
                # "ConserveAllCodon",  # Average per-base fraction of conservation across the codon.  Includes N's and gaps.
                # "EntropyAllCodon",  # Average per-base metric entropy across the codon.  Includes N's and gaps.
                "ConserveCodon",  # Average per-base fraction of conservation across the codon.  Excludes N's and gaps
                "EntropyCodon",  # Average per-base fraction of entropy across the codon.  Excludes N's and gaps
                "N",  # Observed Nonsynonymous substitutions
                "S",  # Observed Nonsynonymous substitutions
                "EN",  # Expected Nonsynonymous substitutions
                "ES",  # Expected Synonymous substitutions
                "dN",
                "dS",
                "dN_minus_dS",  # dN-dS scaled by the tree length
                "unscaled_dN_minus_dS",  # dN-dS
                "Ambig",  # N nucleotide
                "Pad",  # left or right pad gap
                "Gap",  # internal gap between true bases on both sides
                "Err",  # Nucleotide errors within the codon
                "Err_N",  # nonsynonymous AA change due to sequence error
                "Err_S",  # synonymous AA change due to sequence error
                "Ambig_N",  # Ambiguous base changes the AA.  Should be always 0
                "Ambig_S",  # ambigous base does not change the AA.
                "TreeLen",  # Tree length
                "TreeDepth",  # deepest tip to root distance
                "TreeDist",  # distance from actual to expected tree
                "Is_Break",  # Whether a strand switch starts on this codon site
                "BreakRatio",  # sum across window breakpoints (ratio of bases on either side of breakpoint)
                "Polytomy",  # total polytomies in tree
                "P_SameCodonFreq",  # log10 probability that sliced codon frequency distro is same as full population distro
                # Total substitutions that were resolved vs observed
                "ResolvedPerSub"
            ])
        writer.writeheader()
        for slice_fasta_filename in glob.glob(output_dir + os.sep +
                                              "*.*_*.fasta"):

            # don't use hyphy ancestral fasta  or fullgene msa fasta or expected files
            if (slice_fasta_filename.endswith(".anc.fasta")
                    or slice_fasta_filename.endswith(".msa.fasta")
                    or slice_fasta_filename.find("expected") >= 0):
                continue

            # *.{start bp}_{end bp}.fasta filenames use 1-based nucleotide position numbering
            slice_fasta_fileprefix = slice_fasta_filename.split('.fasta')[0]

            win_nuc_range = slice_fasta_fileprefix.split('.')[-1]
            # # Window ends at this 1-based nucleotide position with respect to the reference
            if win_nuc_range.find(
                    "_"
            ) <= 0:  # the full genome msa.fasta file won't have a window range
                continue
            win_start_nuc_pos_1based_wrt_ref, win_end_nuc_pos_1based_wrt_ref = [
                int(x) for x in win_nuc_range.split('_')
            ]
            # Window starts at this 1-based codon position with respect to the reference
            win_start_codon_1based_wrt_ref = win_start_nuc_pos_1based_wrt_ref / Utility.NUC_PER_CODON + 1

            break_ratio = get_break_ratio(
                sim_data=sim_data,
                win_start=win_start_nuc_pos_1based_wrt_ref,
                win_end=win_end_nuc_pos_1based_wrt_ref)

            slice_aln = Utility.Consensus()
            slice_aln.parse(slice_fasta_filename)
            codon_width = slice_aln.get_alignment_len(
            ) / Utility.NUC_PER_CODON  # if the last codon doesn't have enuf chars, then hyphy ignores it

            tree_len = None
            tree_depth = None
            tree_dist = None
            total_polytomies = None
            slice_tree_filename = slice_fasta_fileprefix + ".nwk"
            if os.path.exists(slice_tree_filename):
                # NB:  FastTree tree length in nucleotide substitutions / site.
                # HyPhy converts trees to codon substitution/site to count codon substitutions along phylogeny
                # Parse the HyPhy dnds tsv to get dN, dS,
                polytomy_brlen_thresh = 1.0 / (
                    3 * codon_width
                )  # branch length treshold below which node is considered polytomy
                tree_len, tree_depth, total_polytomies = get_tree_len_depth(
                    slice_tree_filename,
                    polytomy_brlen_thresh=polytomy_brlen_thresh)

                # If there is recombination, there may be multiple trees.
                # Use the full population tree corresponding to slice portion of the genome.

                tree_dist = TestTopology.calc_window_tree_dist(
                    sim_data=sim_data,
                    window_fasta=slice_fasta_filename,
                    window_treefile=slice_tree_filename,
                    win_start=win_start_nuc_pos_1based_wrt_ref,
                    win_end=win_end_nuc_pos_1based_wrt_ref)

            (seq_err, err_aa_change, err_aa_nochange, ambig_aa_change,
             ambig_aa_nochange) = error_by_codonpos(
                 slice_fasta_filename, win_start_nuc_pos_1based_wrt_ref,
                 full_popn_fasta)

            full_popn_aln = Utility.Consensus()
            full_popn_aln.parse(full_popn_fasta)

            dnds_tsv_filename = slice_fasta_fileprefix + ".dnds.tsv"
            subs_tsv_filename = slice_fasta_fileprefix + ".subst.tsv"

            fh_dnds_tsv = None
            reader = None
            site_to_subcounts = dict()
            try:
                if os.path.exists(dnds_tsv_filename) and os.path.getsize(
                        dnds_tsv_filename):
                    fh_dnds_tsv = open(dnds_tsv_filename, 'rU')
                    site_to_subcounts = count_resolved(subs_tsv_filename)
                    reader = csv.DictReader(fh_dnds_tsv, delimiter='\t')

                for codonoffset_0based in xrange(codon_width):
                    nucoffset_0based = codonoffset_0based * Utility.NUC_PER_CODON
                    outrow = dict()
                    outrow["Window_Start"] = win_start_nuc_pos_1based_wrt_ref
                    outrow["Window_End"] = win_end_nuc_pos_1based_wrt_ref
                    outrow["Reads"] = slice_aln.get_total_seqs()
                    outrow[
                        "CodonSite"] = win_start_codon_1based_wrt_ref + codonoffset_0based
                    outrow["CodonDepth"] = slice_aln.get_codon_depth(
                        codon_pos_0based=codonoffset_0based,
                        is_count_ambig=False,
                        is_count_gaps=False,
                        is_count_pad=False)
                    outrow["AADepth"] = slice_aln.get_unambig_codon2aa_depth(
                        codon_pos_0based=codonoffset_0based)
                    outrow["ConserveCodon"] = slice_aln.get_codon_conserve(
                        codonoffset_0based,
                        is_count_ambig=False,
                        is_count_gaps=False,
                        is_count_pad=False)
                    outrow[
                        "EntropyCodon"] = slice_aln.get_codon_shannon_entropy(
                            codonoffset_0based,
                            is_count_ambig=False,
                            is_count_gaps=False,
                            is_count_pad=False)
                    outrow["Ambig"] = (slice_aln.get_ambig_count(
                        pos_0based=nucoffset_0based) +
                                       slice_aln.get_ambig_count(
                                           pos_0based=nucoffset_0based + 1) +
                                       slice_aln.get_ambig_count(
                                           pos_0based=nucoffset_0based + 2))
                    outrow["Pad"] = (
                        slice_aln.get_pad_count(pos_0based=nucoffset_0based) +
                        slice_aln.get_pad_count(pos_0based=nucoffset_0based +
                                                1) +
                        slice_aln.get_pad_count(pos_0based=nucoffset_0based +
                                                2))
                    outrow["Gap"] = (
                        slice_aln.get_gap_count(pos_0based=nucoffset_0based) +
                        slice_aln.get_gap_count(pos_0based=nucoffset_0based +
                                                1) +
                        slice_aln.get_gap_count(pos_0based=nucoffset_0based +
                                                2))
                    outrow["Err"] = seq_err[codonoffset_0based]
                    outrow["Err_N"] = err_aa_change[codonoffset_0based]
                    outrow["Err_S"] = err_aa_nochange[codonoffset_0based]
                    outrow["Ambig_N"] = ambig_aa_change[codonoffset_0based]
                    outrow["Ambig_S"] = ambig_aa_nochange[codonoffset_0based]
                    outrow["TreeLen"] = tree_len
                    outrow["TreeDepth"] = tree_depth
                    outrow["TreeDist"] = tree_dist

                    outrow["Is_Break"] = 0
                    for nuc_strand_start_wrt_ref_base1, nuc_strand_end_wrt_ref_base1 in full_popn_breaks:
                        nuc_pos_wrt_ref_base1 = win_start_nuc_pos_1based_wrt_ref + nucoffset_0based
                        # If there are no recombination breaks, full_popn_breaks still contains the full genome as a contiguous section
                        # Don't consider first position as breakpoint
                        if len(
                                full_popn_breaks
                        ) > 1 and nuc_pos_wrt_ref_base1 == nuc_strand_start_wrt_ref_base1 > 1:
                            outrow["Is_Break"] = 1

                    outrow["BreakRatio"] = break_ratio
                    outrow["Polytomy"] = total_polytomies

                    #  log-likelihood ratio test that codon count distributions are similar between window and full population
                    full_popn_codon_freq = full_popn_aln.get_codon_freq(
                        codon_pos_0based=win_start_codon_1based_wrt_ref +
                        codonoffset_0based - 1,
                        is_count_pad=False,
                        is_count_gaps=False,
                        is_count_ambig=False)
                    slice_codon_freq = slice_aln.get_codon_freq(
                        codon_pos_0based=codonoffset_0based,
                        is_count_pad=False,
                        is_count_gaps=False,
                        is_count_ambig=False)

                    pval_same = cmp_freq_distro(slice_codon_freq,
                                                full_popn_codon_freq,
                                                is_scale=False)
                    outrow["P_SameCodonFreq"] = pval_same

                    if reader:
                        resolved_ns, resolved_s, obs_ns, obs_s = site_to_subcounts[
                            codonoffset_0based]
                        total_subs = resolved_ns + resolved_s + obs_ns + obs_s
                        if total_subs:
                            outrow["ResolvedPerSub"] = (
                                resolved_ns + resolved_s) / float(total_subs)
                        else:
                            outrow["ResolvedPerSub"] = 0

                        dnds_info = reader.next(
                        )  # Every codon site is a row in the *.dnds.tsv file
                        if codonoffset_0based != int(dnds_info["Site"]):
                            # dnds tsv specified the codon site in 0-based coordinates in Site field wrt Slice
                            raise ValueError("Inconsistent site numbering " +
                                             str(codonoffset_0based) + " in " +
                                             dnds_tsv_filename)

                        outrow["N"] = dnds_info[hyphy_handler.HYPHY_TSV_N_COL]
                        outrow["S"] = dnds_info[hyphy_handler.HYPHY_TSV_S_COL]
                        outrow["ES"] = dnds_info[
                            hyphy_handler.HYPHY_TSV_EXP_S_COL]
                        outrow["EN"] = dnds_info[
                            hyphy_handler.HYPHY_TSV_EXP_N_COL]
                        outrow["dN"] = dnds_info[
                            hyphy_handler.HYPHY_TSV_DN_COL]
                        outrow["dS"] = dnds_info[
                            hyphy_handler.HYPHY_TSV_DS_COL]
                        outrow["dN_minus_dS"] = dnds_info[
                            hyphy_handler.HYPHY_TSV_SCALED_DN_MINUS_DS_COL]
                        outrow["unscaled_dN_minus_dS"] = dnds_info[
                            hyphy_handler.HYPHY_TSV_DN_MINUS_DS_COL]

                        if abs(
                                total_subs -
                            (float(dnds_info[hyphy_handler.HYPHY_TSV_N_COL]) +
                             float(dnds_info[hyphy_handler.HYPHY_TSV_S_COL]))
                        ) > 1e-2:
                            raise ValueError(
                                "Inconsitent total subs at 0-based site " +
                                str(codonoffset_0based) + " wrt  " +
                                subs_tsv_filename + " and " +
                                dnds_tsv_filename + " " + str(total_subs) +
                                " " + str(
                                    float(dnds_info[
                                        hyphy_handler.HYPHY_TSV_N_COL]) +
                                    float(dnds_info[
                                        hyphy_handler.HYPHY_TSV_S_COL])))

                    writer.writerow(outrow)

                if reader:
                    try:
                        dnds_info = reader.next()
                        if dnds_info and len(dnds_info) > 0:
                            raise ValueError(
                                "dnds TSV has more codons than expected " +
                                dnds_tsv_filename)
                    except StopIteration:  # We want the reader to have no more rows
                        pass

            finally:
                if fh_dnds_tsv and not fh_dnds_tsv.closed:
                    fh_dnds_tsv.close()