Пример #1
0
def export_frequency_json(process, prefix, indent):
    num_dp = 6
    # construct a json file containing all frequency estimate
    # the format is region_protein:159F for mutations and region_clade:123 for clades
    if hasattr(process, 'pivots'):
        freq_json = {'pivots':round_freqs(process.pivots, num_dp)}
        if hasattr(process, 'mutation_frequencies'):
            freq_json['counts'] = {x:list(counts) for x, counts in process.mutation_frequency_counts.iteritems()}
            for (region, gene), tmp_freqs in process.mutation_frequencies.iteritems():
                for mut, freq in tmp_freqs.iteritems():
                    label_str =  region+"_"+ gene + ':' + str(mut[0]+1)+mut[1]
                    freq_json[label_str] = round_freqs(freq, num_dp)
        # repeat for clade frequencies in trees
        if hasattr(process, 'tree_frequencies'):
            for region in process.tree_frequencies:
                for clade, freq in process.tree_frequencies[region].iteritems():
                    label_str = region+'_clade:'+str(clade)
                    freq_json[label_str] = round_freqs(freq, num_dp)
        # repeat for named clades
        if hasattr(process, 'clades_to_nodes') and hasattr(process, 'tree_frequencies'):
            for region in process.tree_frequencies:
                for clade, node in process.clades_to_nodes.iteritems():
                    label_str = region+'_'+str(clade)
                    freq_json[label_str] = round_freqs(process.tree_frequencies[region][node.clade], num_dp)
        # write to one frequency json
        if hasattr(process, 'tree_frequencies') or hasattr(process, 'mutation_frequencies'):
            write_json(freq_json, prefix+'_frequencies.json', indent=indent)
    else:
        process.log.notify("Cannot export frequencies - pivots do not exist")
Пример #2
0
    def export(self, prefix='web/data/', extra_attr = []):
        '''
        export the tree, sequences, frequencies to json files for visualization
        in the browser
        '''

        # export json file that contains alignment diversity column by column
        self.seqs.export_diversity(prefix+'entropy.json')
        # exports the tree and the sequences inferred for all clades in the tree
        self.tree.export(path=prefix, extra_attr = extra_attr + ["subtype", "country", "region", "nuc_muts",
                            "ep", "ne", "rb", "aa_muts","lab", "accession","isolate_id"])


        # local function or round frequency estimates to useful precision (reduces file size)
        def process_freqs(freq):
            return [round(x,4) for x in freq]

        # construct a json file containing all frequency estimate
        # the format is region_protein:159F for mutations and region_clade:123 for clades
        freq_json = {'pivots':process_freqs(self.pivots)}
        freq_json['counts'] = {x:list(counts) for x, counts in self.tip_count.iteritems()}
        for (region, gene), tmp_freqs in self.frequencies.iteritems():
            for mut, freq in tmp_freqs.iteritems():
                label_str =  region+"_"+ gene + ':' + str(mut[0]+1)+mut[1]
                freq_json[label_str] = process_freqs(freq)
        # repeat for clade frequencies in trees
        for (region,clade), freq in self.tree_frequencies.iteritems():
            label_str = region+'_clade:'+str(clade)
            freq_json[label_str] = process_freqs(freq)
        # write to one frequency json
        write_json(freq_json, prefix+'frequencies.json', indent=None)

        self.HI_export(prefix)
Пример #3
0
def titer_export(process):
    from base.io_util import write_json
    from itertools import chain
    import pandas as pd

    prefix = process.config["output"]["auspice"]+'/'+process.info["prefix"]+'_'

    if hasattr(process, 'titer_tree'):
        # export the raw titers
        data = process.titer_tree.compile_titers()
        write_json(data, prefix+'titers.json', indent=1)
        # export the tree model
        tree_model = {'potency':process.titer_tree.compile_potencies(),
                      'avidity':process.titer_tree.compile_virus_effects(),
                      'dTiter':{n.clade:n.dTiter for n in process.tree.tree.find_clades() if n.dTiter>1e-6}}
        write_json(tree_model, prefix+'tree_model.json')

        # export model performance on test set
        if hasattr(process.titer_tree, 'cross_validation'):
            predicted_values = list(chain.from_iterable([iteration.pop('values') for iteration in process.titer_tree.cross_validation ])) # flatten to one list of (actual, predicted) tuples
            predicted_values = pd.DataFrame(predicted_values, columns=['actual', 'predicted']) # cast to df so we can easily write to csv
            model_performance = pd.DataFrame(process.titer_tree.cross_validation) # list of dictionaries -> df

            predicted_values.to_csv(prefix+'predicted_titers.csv', index=False)
            model_performance.to_csv(prefix+'titer_model_performance.csv', index=False)
        elif hasattr(process.titer_tree.hasattr, 'validation'):
            predicted_values = pd.DataFrame(process.titer_tree.validation.pop('values'), columns=['actual', 'predicted'])
            model_performance = pd.DataFrame(process.titer_tree.validation)
            predicted_values.to_csv(prefix+'predicted_titers.csv', index=False)
            model_performance.to_csv(prefix+'titer_model_performance.csv', index=False)

    else:
        print('Tree model not yet trained')
Пример #4
0
def titer_export(process):
    from base.io_util import write_json
    from itertools import chain
    import pandas as pd

    prefix = process.config["output"]["auspice"] + '/' + process.info[
        "prefix"] + '_'

    if hasattr(process, 'titer_tree'):
        # export the raw titers
        data = process.titer_tree.compile_titers()
        write_json(data, prefix + 'titers.json', indent=1)
        # export the tree model
        tree_model = {
            'potency': process.titer_tree.compile_potencies(),
            'avidity': process.titer_tree.compile_virus_effects(),
            'dTiter': {
                n.clade: n.dTiter
                for n in process.tree.tree.find_clades() if n.dTiter > 1e-6
            }
        }
        write_json(tree_model, prefix + 'tree_model.json')

        # export model performance on test set
        if hasattr(process.titer_tree, 'cross_validation'):
            predicted_values = list(
                chain.from_iterable([
                    iteration.pop('values')
                    for iteration in process.titer_tree.cross_validation
                ]))  # flatten to one list of (actual, predicted) tuples
            predicted_values = pd.DataFrame(
                predicted_values,
                columns=['actual', 'predicted'
                         ])  # cast to df so we can easily write to csv
            model_performance = pd.DataFrame(
                process.titer_tree.cross_validation
            )  # list of dictionaries -> df

            predicted_values.to_csv(prefix + 'predicted_titers.csv',
                                    index=False)
            model_performance.to_csv(prefix + 'titer_model_performance.csv',
                                     index=False)
        elif hasattr(process.titer_tree.hasattr, 'validation'):
            predicted_values = pd.DataFrame(
                process.titer_tree.validation.pop('values'),
                columns=['actual', 'predicted'])
            model_performance = pd.DataFrame(process.titer_tree.validation)
            predicted_values.to_csv(prefix + 'predicted_titers.csv',
                                    index=False)
            model_performance.to_csv(prefix + 'titer_model_performance.csv',
                                     index=False)

    else:
        print('Tree model not yet trained')
Пример #5
0
def export_metadata_json(process, prefix, indent):
    process.log.notify("Writing out metaprocess")
    meta_json = {}

    # count number of tip nodes
    virus_count = 0
    for node in process.tree.tree.get_terminals():
        virus_count += 1
    meta_json["virus_count"] = virus_count

    author_info = summarise_publications_from_tree(process.tree.tree)
    meta_json["author_info"] = author_info

    # join up config color options with those in the input JSONs.
    col_opts = process.config["auspice"]["color_options"]
    if process.colors:
        for trait, col in process.colors.iteritems():
            if trait in col_opts:
                col_opts[trait]["color_map"] = col
            else:
                process.log.warn("{} in colors (input JSON) but not auspice/color_options. Ignoring".format(trait))

    meta_json["color_options"] = col_opts
    if "date_range" in process.config["auspice"]:
        meta_json["date_range"] = process.config["auspice"]["date_range"]
    if "analysisSlider" in process.config["auspice"]:
        meta_json["analysisSlider"] = process.config["auspice"]["analysisSlider"]
    meta_json["panels"] = process.config["auspice"]["panels"]
    meta_json["updated"] = time.strftime("X%d %b %Y").replace('X0','X').replace('X','')
    meta_json["title"] = process.info["title"]
    meta_json["maintainer"] = process.info["maintainer"]
    meta_json["filters"] = process.info["auspice_filters"]
    meta_json["annotations"] = extract_annotations(process)

    # pass through flu specific information (if present)
    if "vaccine_choices" in process.info:
        meta_json["vaccine_choices"] = process.info["vaccine_choices"]

    ## ignore frequency params for now until they are implemented in nextstrain/auspice

    if "defaults" in process.config["auspice"]:
        meta_json["defaults"] = process.config["auspice"]["defaults"]

    try:
        import git
        meta_json["commit"] = git.Repo(search_parent_directories=True).head.object.hexsha
    except ImportError:
        meta_json["commit"] = "unknown"
    meta_json["geo"] = process.lat_longs
    write_json(meta_json, prefix+'_meta.json')
Пример #6
0
def export_tip_frequency_json(process, prefix, indent):
    if not (hasattr(process, 'pivots') and hasattr(process, 'kde_frequencies')):
        process.log.notify("Cannot export tip frequencies - pivots and/or kde_frequencies do not exist")
        return

    num_dp = 6
    freq_json = {'pivots':round_freqs(process.pivots, num_dp)}

    for n in process.tree.tree.get_terminals():
        freq_json[n.name] = {
            "frequencies" : round_freqs(process.kde_frequencies["global"][n.clade], num_dp),
            "weight": 1.0
        }

    write_json(freq_json, prefix+'_tip-frequencies.json', indent=indent)
Пример #7
0
def main(params):
    import time
    from io_util import read_json
    from io_util import write_json
    from tree_util import json_to_dendropy, dendropy_to_json

    print "--- Start fitness model optimization at " + time.strftime(
        "%H:%M:%S") + " ---"

    tree_fname = 'data/tree_refine.json'
    tree = json_to_dendropy(read_json(tree_fname))
    fm = fitness_model(tree, predictors=params['predictors'], verbose=1)
    fm.predict(niter=params['niter'])
    out_fname = "data/tree_fitness.json"
    write_json(dendropy_to_json(tree.root), out_fname)
    return out_fname
Пример #8
0
def HI_export(self):
    from base.io_util import write_json
    prefix = os.path.join(self.config["output"]["auspice"],
                          self.info["prefix"])
    if hasattr(self, 'HI_tree'):
        # export the raw titers
        hi_data = self.HI_tree.compile_titers()
        write_json(hi_data, prefix + '_titers.json')
        # export the tree model (avidities and potencies only)
        tree_model = {
            'potency': self.HI_tree.compile_potencies(),
            'avidity': self.HI_tree.compile_virus_effects(),
            'dTiter': {
                n.clade: n.dTiter
                for n in self.tree.tree.find_clades() if n.dTiter > 1e-6
            }
        }
        write_json(tree_model, prefix + '_titer_tree_model.json')
    else:
        print('Tree model not yet trained')

    if hasattr(self, 'HI_subs'):
        # export the substitution model
        subs_model = {
            'potency': self.HI_subs.compile_potencies(),
            'avidity': self.HI_subs.compile_virus_effects(),
            'substitution': self.HI_subs.compile_substitution_effects()
        }
        write_json(subs_model, prefix + '_titer_subs_model.json')
    else:
        print('Substitution model not yet trained')
Пример #9
0
    def to_json(self, filename):
        """Export fitness model parameters, data, and accuracy statistics to JSON.
        """
        # Convert predictor parameters to a data frame to easily export as
        # records.
        params_df = pd.DataFrame({
            "predictor": self.predictors,
            "param": self.model_params.tolist(),
            "global_sd": self.global_sds.tolist()
        })

        rho_null, rho_raw, rho_rel = self.get_correlation()

        data = {
            "params": params_df.to_dict(orient="records"),
            "data": self.pred_vs_true_df.to_dict(orient="records"),
            "accuracy": {
                "clade_error": self.clade_fit(self.model_params),
                "rho_rel": rho_rel[0]
            }
        }
        write_json(data, filename)
Пример #10
0
def titer_export(process, model_type='tree'):
    from base.io_util import write_json
    from itertools import chain
    import pandas as pd

    prefix = process.config["output"]["auspice"] + '/' + process.info["prefix"]

    if hasattr(process, 'titer_model'):
        # export the raw titers
        data = process.titer_model.compile_titers()
        write_json(data, prefix + 'titers.json', indent=1)

        if model_type == 'tree':
            for n in process.tree.tree.find_clades():
                n.attr['cTiter'] = n.cTiter
                n.attr['dTiter'] = n.dTiter

            process.tree.export(
                path=prefix,
                extra_attr=process.config["auspice"]["extra_attr"] +
                ["muts", "aa_muts", "attr", "clade", "cTiter", "dTiter"],
                indent=1,
                write_seqs_json=
                False  #"sequences" in self.config["auspice"]["extra_jsons"]
            )

            titer_model = {
                'potency': process.titer_model.compile_potencies(),
                'avidity': process.titer_model.compile_virus_effects(),
                'dTiter': {
                    n.clade: n.dTiter
                    for n in process.tree.tree.find_clades() if n.dTiter > 1e-6
                }
            }

            write_json(titer_model, prefix + 'tree_model.json')

        elif model_type == 'substitution':
            titer_model = {
                'potency': process.titer_model.compile_potencies(),
                'avidity': process.titer_model.compile_virus_effects(),
                'mutations':
                process.titer_model.compile_substitution_effects()
            }

            write_json(titer_model, prefix + 'substitution_model.json')

    else:
        print('Tree model not yet trained')
Пример #11
0
    def HI_export(self, prefix):
        if hasattr(self, 'HI_tree'):
            # export the raw titers
            hi_data = self.HI_tree.compile_titers()
            write_json(hi_data, prefix+'titers.json')
            # export the tree model (avidities and potencies only)
            tree_model = {'potency':self.HI_tree.compile_potencies(),
                          'avidities':self.HI_tree.compile_virus_effects()}
            write_json(tree_model, prefix+'titer_tree_model.json')
        else:
            print('Tree model not yet trained')

        if hasattr(self, 'HI_tree'):
            # export the substitution model
            subs_model = {'potency':self.HI_subs.compile_potencies(),
                          'avidity':self.HI_subs.compile_virus_effects(),
                          'substitution':self.HI_subs.compile_substitution_effects()}
            write_json(subs_model, prefix+'titer_subs_model.json')
        else:
            print('Substitution model not yet trained')
Пример #12
0
def HI_export(process):
    prefix = os.path.join(process.config["output"]["auspice"], process.info["prefix"])

    if hasattr(process, 'HI_tree'):
        # export the raw titers
        hi_data = process.HI_tree.compile_titers()
        write_json(hi_data, prefix+'_titers.json')
        # export the tree model (avidities and potencies only)
        tree_model = {'potency':process.HI_tree.compile_potencies(),
                      'avidity':process.HI_tree.compile_virus_effects(),
                      'dTiter':{n.clade:n.dTiter for n in process.tree.tree.find_clades() if n.dTiter>1e-6}}
        write_json(tree_model, prefix+'_titer_tree_model.json')
    else:
        print('Tree model not yet trained')

    if hasattr(process, 'HI_subs'):
        # export the substitution model
        subs_model = {'potency':process.HI_subs.compile_potencies(),
                      'avidity':process.HI_subs.compile_virus_effects(),
                      'substitution':process.HI_subs.compile_substitution_effects()}
        write_json(subs_model, prefix+'_titer_subs_model.json')
    else:
        print('Substitution model not yet trained')
Пример #13
0
    def titer_export(self):
        from base.io_util import write_json
        prefix = self.build_data_path
        if hasattr(self, 'titer_tree'):
            # export the raw titers
            data = self.titer_tree.compile_titers()
            write_json(data, prefix+'titers.json', indent=1)
            # export the tree model (avidities and potencies only)
            tree_model = {'potency':self.titer_tree.compile_potencies(),
                          'avidity':self.titer_tree.compile_virus_effects(),
                          'dTiter':{n.clade:n.dTiter for n in self.tree.tree.find_clades() if n.dTiter>1e-6}}
            write_json(tree_model, prefix+'tree_model.json')
        else:
            print('Tree model not yet trained')

        if hasattr(self, 'titer_tree'):
            # export the substitution model
            titer_subs_model = {'potency':self.titer_subs.compile_potencies(),
                          'avidity':self.titer_subs.compile_virus_effects(),
                          'substitution':self.titer_subs.compile_substitution_effects()}
            write_json(titer_subs_model, prefix+'titer_subs_model.json')
        else:
            print('Substitution model not yet trained')
Пример #14
0
                    "legendTitle": "Receptor binding mutations",
                    "key": "rb"
                }

        # titers
        if hasattr(runner, "titers") and runner.info["segment"] == "ha":
            HI_model(runner)

            if runner.config["auspice"]["titers_export"]:
                HI_export(runner)
                vaccine_distance_json = vaccine_distance(
                    titer_tree=runner.tree.tree,
                    vaccine_strains=vaccine_choices[runner.info['lineage']],
                    attributes=['dTiter', 'dTiterSub'])
                write_json(
                    vaccine_distance_json,
                    os.path.join(runner.config["output"]["auspice"],
                                 runner.info["prefix"]) + '_vaccine_dist.json')

                if runner.info["segment"] == 'ha':
                    plot_titers(runner.HI_subs,
                                runner.HI_subs.titers.titers,
                                fname='processed/%s_raw_titers.png' %
                                runner.info["prefix"],
                                title=runner.info["prefix"],
                                mean='geometric')
                    plot_titers(runner.HI_subs,
                                runner.HI_subs.titers.titers_normalized,
                                fname='processed/%s_normalized_titers.png' %
                                runner.info["prefix"],
                                title=runner.info["prefix"],
                                mean='arithmetric')
Пример #15
0
    def export(self,
               extra_attr=[],
               controls={},
               geo_attributes=[],
               color_options={
                   "num_date": {
                       "key": "num_date",
                       "legendTitle": "Sampling date",
                       "menuItem": "date",
                       "type": "continuous"
                   }
               },
               panels=['tree', 'entropy'],
               indent=None):
        '''
        export the tree, sequences, frequencies to json files for visualization
        in the browser
        '''
        prefix = self.build_data_path
        # export json file that contains alignment diversity column by column
        self.seqs.export_diversity(prefix + 'entropy.json')
        # exports the tree and the sequences inferred for all clades in the tree
        if hasattr(self, 'tree') and self.tree is not None:
            self.tree.export(path=prefix,
                             extra_attr=extra_attr +
                             ["muts", "aa_muts", "attr", "clade"],
                             indent=indent)

        # local function or round frequency estimates to useful precision (reduces file size)
        def process_freqs(freq):
            return [round(x, 4) for x in freq]

        # construct a json file containing all frequency estimate
        # the format is region_protein:159F for mutations and region_clade:123 for clades
        if hasattr(self, 'pivots'):
            freq_json = {'pivots': process_freqs(self.pivots)}
        if hasattr(self, 'mutation_frequencies'):
            freq_json['counts'] = {
                x: list(counts)
                for x, counts in self.mutation_frequency_counts.iteritems()
            }
            for (region,
                 gene), tmp_freqs in self.mutation_frequencies.iteritems():
                for mut, freq in tmp_freqs.iteritems():
                    label_str = region + "_" + gene + ':' + str(mut[0] +
                                                                1) + mut[1]
                    freq_json[label_str] = process_freqs(freq)
        # repeat for clade frequencies in trees
        if hasattr(self, 'tree_frequencies'):
            for region in self.tree_frequencies:
                for clade, freq in self.tree_frequencies[region].iteritems():
                    label_str = region + '_clade:' + str(clade)
                    freq_json[label_str] = process_freqs(freq)
        # repeat for named clades
        if hasattr(self, 'clades_to_nodes') and hasattr(
                self, 'tree_frequencies'):
            for region in self.tree_frequencies:
                for clade, node in self.clades_to_nodes.iteritems():
                    label_str = region + '_' + str(clade)
                    freq_json[label_str] = process_freqs(
                        self.tree_frequencies[region][node.clade])
        # write to one frequency json
        if hasattr(self, 'tree_frequencies') or hasattr(
                self, 'mutation_frequencies'):
            write_json(freq_json, prefix + 'frequencies.json', indent=indent)

        # write out metadata json# Write out metadata
        print("Writing out metadata")
        meta_json = {}
        meta_json["color_options"] = color_options
        meta_json["panels"] = panels
        meta_json["updated"] = time.strftime("X%d %b %Y").replace('X0',
                                                                  'X').replace(
                                                                      'X', '')
        try:
            from pygit2 import Repository, discover_repository
            current_working_directory = os.getcwd()
            repository_path = discover_repository(current_working_directory)
            repo = Repository(repository_path)
            commit_id = repo[repo.head.target].id
            meta_json["commit"] = str(commit_id)
        except ImportError:
            meta_json["commit"] = "unknown"
        if len(controls):
            meta_json["controls"] = self.make_control_json(controls)
        if len(geo_attributes):
            meta_json["geo"] = self.make_geo_lookup_json(geo_attributes)
        write_json(meta_json, prefix + 'meta.json')
Пример #16
0
def export_metadata_json(process, prefix, indent):
    process.log.notify("Writing out metaprocess")
    meta_json = {}

    # count number of tip nodes
    virus_count = 0
    for node in process.tree.tree.get_terminals():
        virus_count += 1
    meta_json["virus_count"] = virus_count

    (author_info,
     seq_to_author) = summarise_publications_from_tree(process.tree.tree)
    meta_json["author_info"] = author_info
    meta_json["seq_author_map"] = seq_to_author

    # join up config color options with those in the input JSONs.
    col_opts = process.config["auspice"]["color_options"]
    if process.colors:
        for trait, col in process.colors.iteritems():
            if trait in col_opts:
                col_opts[trait]["color_map"] = col
            else:
                process.log.warn(
                    "{} in colors (input JSON) but not auspice/color_options. Ignoring"
                    .format(trait))

    meta_json["color_options"] = col_opts
    if "date_range" in process.config["auspice"]:
        meta_json["date_range"] = process.config["auspice"]["date_range"]
    if "analysisSlider" in process.config["auspice"]:
        meta_json["analysisSlider"] = process.config["auspice"][
            "analysisSlider"]
    meta_json["panels"] = process.config["auspice"]["panels"]
    meta_json["updated"] = time.strftime("X%d %b %Y").replace('X0',
                                                              'X').replace(
                                                                  'X', '')
    # meta_json["title"] = process.info["title"]
    # meta_json["maintainer"] = process.info["maintainer"]
    # meta_json["filters"] = process.info["auspice_filters"]
    # meta_json["annotations"] = extract_annotations(process)

    # pass through flu specific information (if present)
    if "vaccine_choices" in process.info:
        meta_json["vaccine_choices"] = process.info["vaccine_choices"]

    ## ignore frequency params for now until they are implemented in nextstrain/auspice

    if "defaults" in process.config["auspice"]:
        meta_json["defaults"] = process.config["auspice"]["defaults"]

    # try:
    #     from pygit2 import Repository, discover_repository
    #     current_working_directory = os.getcwd()
    #     repository_path = discover_repository(current_working_directory)
    #     repo = Repository(repository_path)
    #     # commit_id = repo[repo.head.target].id
    #     # meta_json["commit"] = str(commit_id)
    # except ImportError:
    #     meta_json["commit"] = "unknown"
    if len(process.config["auspice"]["controls"]):
        meta_json["controls"] = process.make_control_json(
            process.config["auspice"]["controls"])
    meta_json["geo"] = process.lat_longs
    write_json(meta_json, prefix + '_meta.json')
Пример #17
0
    def auspice_export(self):
        '''
        export the tree, sequences, frequencies to json files for auspice visualization
        '''
        prefix = os.path.join(self.config["output"]["auspice"],
                              self.info["prefix"])
        indent = 2
        # export json file that contains alignment diversity column by column
        self.seqs.export_diversity(fname=prefix + '_entropy.json', indent=2)
        # exports the tree and the sequences inferred for all clades in the tree
        if hasattr(self, 'tree') and self.tree is not None:
            self.tree.export(path=prefix,
                             extra_attr=self.config["auspice"]["extra_attr"] +
                             ["muts", "aa_muts", "attr", "clade"],
                             indent=indent)

        # local function or round frequency estimates to useful precision (reduces file size)
        def process_freqs(freq):
            return [round(x, 4) for x in freq]

        # construct a json file containing all frequency estimate
        # the format is region_protein:159F for mutations and region_clade:123 for clades
        if hasattr(self, 'pivots'):
            freq_json = {'pivots': process_freqs(self.pivots)}
        if hasattr(self, 'mutation_frequencies'):
            freq_json['counts'] = {
                x: list(counts)
                for x, counts in self.mutation_frequency_counts.iteritems()
            }
            for (region,
                 gene), tmp_freqs in self.mutation_frequencies.iteritems():
                for mut, freq in tmp_freqs.iteritems():
                    label_str = region + "_" + gene + ':' + str(mut[0] +
                                                                1) + mut[1]
                    freq_json[label_str] = process_freqs(freq)
        # repeat for clade frequencies in trees
        if hasattr(self, 'tree_frequencies'):
            for region in self.tree_frequencies:
                for clade, freq in self.tree_frequencies[region].iteritems():
                    label_str = region + '_clade:' + str(clade)
                    freq_json[label_str] = process_freqs(freq)
        # repeat for named clades
        if hasattr(self, 'clades_to_nodes') and hasattr(
                self, 'tree_frequencies'):
            for region in self.tree_frequencies:
                for clade, node in self.clades_to_nodes.iteritems():
                    label_str = region + '_' + str(clade)
                    freq_json[label_str] = process_freqs(
                        self.tree_frequencies[region][node.clade])
        # write to one frequency json
        if hasattr(self, 'tree_frequencies') or hasattr(
                self, 'mutation_frequencies'):
            write_json(freq_json, prefix + '_frequencies.json', indent=indent)

        # count number of tip nodes
        virus_count = 0
        for node in self.tree.tree.get_terminals():
            virus_count += 1

        # write out metadata json# Write out metadata
        print("Writing out metadata")
        meta_json = {}

        # join up config color options with those in the input JSONs.
        col_opts = self.config["auspice"]["color_options"]
        if self.colors:
            for trait, data in self.colors.iteritems():
                if trait in col_opts:
                    col_opts[trait]["color_map"] = data
                else:
                    self.log.warn(
                        "{} in colors (input JSON) but not auspice/color_options. Ignoring"
                        .format(trait))

        meta_json["color_options"] = col_opts
        if "date_range" in self.config["auspice"]:
            meta_json["date_range"] = self.config["auspice"]["date_range"]
        if "analysisSlider" in self.config["auspice"]:
            meta_json["analysisSlider"] = self.config["auspice"][
                "analysisSlider"]
        meta_json["panels"] = self.config["auspice"]["panels"]
        meta_json["updated"] = time.strftime("X%d %b %Y").replace('X0',
                                                                  'X').replace(
                                                                      'X', '')
        meta_json["virus_count"] = virus_count
        if "defaults" in self.config["auspice"]:
            meta_json["defaults"] = self.config["auspice"]["defaults"]
        # TODO: move these to base/config
        # valid_defaults = {'colorBy': ['region', 'country', 'num_date', 'ep', 'ne', 'rb', 'genotype', 'cHI'],
        #                   'layout': ['radial', 'rectangular', 'unrooted'],
        #                   'geoResolution': ['region', 'country', 'division'],
        #                   'distanceMeasure': ['num_date', 'div']}
        # for param, value in defaults.items():
        #     try:
        #         assert param in valid_defaults and value in valid_defaults[param]
        #     except:
        #          print('ERROR: invalid default options provided. Try one of the following instead:', valid_defaults)
        #          print('Export will continue; default display options can be corrected in the meta.JSON file.')
        #          continue
        # meta_json["defaults"] = defaults

        try:
            from pygit2 import Repository, discover_repository
            current_working_directory = os.getcwd()
            repository_path = discover_repository(current_working_directory)
            repo = Repository(repository_path)
            commit_id = repo[repo.head.target].id
            meta_json["commit"] = str(commit_id)
        except ImportError:
            meta_json["commit"] = "unknown"
        if len(self.config["auspice"]["controls"]):
            meta_json["controls"] = self.make_control_json(
                self.config["auspice"]["controls"])
        meta_json["geo"] = self.lat_longs
        write_json(meta_json, prefix + '_meta.json')