Exemplo n.º 1
0
def main(params):
    import time
    from io_util import read_json
    from io_util import write_json
    from tree_util import json_to_dendropy, dendropy_to_json

    print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---"

    tree_fname = "data/tree_refine.json"
    tree = json_to_dendropy(read_json(tree_fname))
    fm = fitness_model(tree, predictors=params["predictors"], verbose=1)
    fm.predict(niter=params["niter"])
    out_fname = "data/tree_fitness.json"
    write_json(dendropy_to_json(tree.seed_node), out_fname)
    return out_fname
Exemplo n.º 2
0
def main(in_fname='data/tree_refine.json', tree=True):

	print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---"
	viruses = read_json(in_fname)
	if tree:
		viruses = json_to_dendropy(viruses)

	assign_fitness(viruses)

	if tree:
		out_fname = "data/tree_tolerance.json"
		write_json(dendropy_to_json(viruses.seed_node), out_fname)
	else:
		out_fname = "data/virus_tolerance.json"
		write_json(viruses, out_fname)
	return out_fname, viruses
Exemplo n.º 3
0
def main(params):
    import time
    from io_util import read_json
    from io_util import write_json
    from tree_util import json_to_dendropy, dendropy_to_json

    print "--- Start fitness model optimization at " + time.strftime(
        "%H:%M:%S") + " ---"

    tree_fname = 'data/tree_refine.json'
    tree = json_to_dendropy(read_json(tree_fname))
    fm = fitness_model(tree, predictors=params['predictors'], verbose=1)
    fm.predict(niter=params['niter'])
    out_fname = "data/tree_fitness.json"
    write_json(dendropy_to_json(tree.root), out_fname)
    return out_fname
Exemplo n.º 4
0
def main(in_fname='data/tree_refine.json', tree=True):

	print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---"
	viruses = read_json(in_fname)
	if tree:
		viruses = json_to_dendropy(viruses)

	assign_fitness(viruses)

	if tree:
		out_fname = "data/tree_tolerance.json"		
		write_json(dendropy_to_json(viruses.seed_node), out_fname)
	else:
		out_fname = "data/virus_tolerance.json"
		write_json(viruses, out_fname)
	return out_fname, viruses
Exemplo n.º 5
0
def main(tree_fname = 'data/tree_refine.json'):

	print "--- Testing predictor evaluations ---"
	tree =  json_to_dendropy(read_json(tree_fname))

	print "Calculating epitope distances"
	calc_epitope_distance(tree)

	print "Calculating nonepitope distances"
	calc_nonepitope_distance(tree)

	print "Calculating LBI"
#	calc_LBI(tree)

	print "Writing decorated tree"
	out_fname = "data/tree_predictors.json"
	write_json(dendropy_to_json(tree.seed_node), out_fname)
	return out_fname
Exemplo n.º 6
0
    def export_to_auspice(self,
                          tree_fields=[],
                          tree_pop_list=[],
                          annotations=[],
                          seq='aa'):
        from tree_util import dendropy_to_json, all_descendants
        from io_util import write_json, read_json
        print "--- Streamline at " + time.strftime("%H:%M:%S") + " ---"
        # Move sequence data to separate file
        print "Writing sequences"
        elems = {}
        for node in self.tree:
            if hasattr(node, "clade") and hasattr(node, "seq"):
                elems[node.clade] = {}
                elems[node.clade]['nuc'] = {
                    pos: state
                    for pos, (state, ancstate) in enumerate(
                        izip(node.seq, self.tree.seed_node.seq))
                    if state != ancstate
                }
                for anno, aa_seq in node.aa_seq.iteritems():
                    elems[node.clade][anno] = {
                        pos: state
                        for pos, (state, ancstate) in enumerate(
                            izip(aa_seq, self.tree.seed_node.aa_seq[anno]))
                        if state != ancstate
                    }

        elems['root'] = {}
        elems['root']['nuc'] = self.tree.seed_node.seq
        for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems():
            elems['root'][anno] = aa_seq
        write_json(elems, self.auspice_sequences_fname, indent=None)

        print "Writing tree"
        self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields)
        for node in all_descendants(self.tree_json):
            for attr in tree_pop_list:
                if attr in node:
                    node.pop(attr, None)
            if "freq" in node:
                for reg in node["freq"]:
                    try:
                        node["freq"][reg] = [
                            round(x, 3) for x in node["freq"][reg]
                        ]
                    except:
                        node["freq"][reg] = "undefined"

        if hasattr(self, "clade_designations"):
            # find basal node of clade and assign clade x and y values based on this basal node
            clade_present = {}
            clade_xval = {}
            clade_yval = {}
            if hasattr(self.tree.seed_node, "freq"):
                self.frequencies['clades'] = {
                    reg: {
                        "pivots": list(self.tree.seed_node.pivots)
                    }
                    for reg in self.tree.seed_node.freq
                }

            for clade, gt in self.clade_designations.iteritems():
                if clade in annotations:
                    print "Annotating clade", clade
                    tmp_nodes = sorted(
                        (node for node in self.tree.postorder_node_iter()
                         if not node.is_leaf() and all([
                             node.aa_seq[gene][pos - 1] == aa
                             for gene, pos, aa in gt
                         ])),
                        key=lambda node: node.xvalue)
                    if len(tmp_nodes):
                        clade_present[clade] = True
                        base_node = tmp_nodes[0]
                        clade_xval[clade] = base_node.xvalue
                        clade_yval[clade] = base_node.yvalue
                        if hasattr(base_node, 'freq'):
                            for region in base_node.freq:
                                try:
                                    self.frequencies["clades"][region][
                                        clade.lower()] = [
                                            round(x, 3)
                                            for x in base_node.freq[region]
                                        ]
                                    print "added frequencies", region, clade
                                except:
                                    print base_node.freq[region]
                    else:
                        clade_present[clade] = False
                        print "clade", clade, gt, "not in tree"
            # append clades, coordinates and genotype to meta
            self.tree_json["clade_annotations"] = [
                (clade, clade_xval[clade], clade_yval[clade],
                 "/".join([gene + ':' + str(pos) + aa
                           for gene, pos, aa in gt]))
                for clade, gt in self.clade_designations.iteritems()
                if clade in annotations and clade_present[clade] == True
            ]
        write_json(self.tree_json, self.auspice_tree_fname, indent=None)
        try:
            read_json(self.auspice_tree_fname)
        except:
            print "Read failed, rewriting with indents"
            write_json(self.tree_json, self.auspice_tree_fname, indent=1)

        # Write out frequencies
        if hasattr(self, 'frequencies'):
            if not hasattr(self, 'aa_entropy') and not hasattr(
                    self, 'nuc_entropy'):
                self.determine_variable_positions()

            if hasattr(self, 'aa_entropy'):
                self.frequencies["entropy"] = {}
                self.frequencies["location"] = {}
                for anno, alnS in self.aa_entropy.iteritems():
                    self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\
                              int(self.cds[anno].location.start)]
                    self.frequencies["entropy"][anno] = [[
                        pos, S, muts
                    ] for pos, S, muts in izip(
                        xrange(alnS.shape[0]), alnS,
                        self.variable_aa_identities[anno])]
            elif seq == 'nuc' and hasattr(self, 'nuc_entropy'):
                self.frequencies["entropy"] = [
                    [pos, S, muts] for pos, S, muts in izip(
                        xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,
                        self.variable_nuc_identities)
                ]

            write_json(self.frequencies, self.auspice_frequency_fname)

        # Write out metadata
        print "Writing out metadata"
        meta = {}
        meta["updated"] = time.strftime("X%d %b %Y").replace('X0',
                                                             'X').replace(
                                                                 'X', '')
        try:
            from pygit2 import Repository, discover_repository
            current_working_directory = os.getcwd()
            repository_path = discover_repository(current_working_directory)
            repo = Repository(repository_path)
            commit_id = repo[repo.head.target].id
            meta["commit"] = str(commit_id)
        except ImportError:
            meta["commit"] = "unknown"

        if hasattr(self, "date_region_count"):
            meta["regions"] = self.regions
            meta["virus_stats"] = [
                [str(y) + '-' + str(m)] +
                [self.date_region_count[(y, m)][reg] for reg in self.regions]
                for y, m in sorted(self.date_region_count.keys())
            ]
        write_json(meta, self.auspice_meta_fname, indent=None)
        self.export_accession_numbers()
Exemplo n.º 7
0
	def export_to_auspice(self, tree_fields = [], tree_pop_list = [], annotations = [], seq='aa'):
		from tree_util import dendropy_to_json, all_descendants
		from io_util import write_json, read_json
		print time.strftime("%H:%M:%S") + " ---"
		# Move sequence data to separate file
		print "Writing sequences"
		elems = {}
		for node in self.tree:
			if hasattr(node, "clade") and hasattr(node, "seq"):
				elems[node.clade] = {}
				elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in 
								enumerate(izip(node.seq, self.tree.seed_node.seq)) if state!=ancstate}
				for anno, aa_seq in node.aa_seq.iteritems():
					elems[node.clade][anno] = {pos:state for pos, (state, ancstate) in 
								enumerate(izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state!=ancstate}

		elems['root'] = {}
		elems['root']['nuc'] = self.tree.seed_node.seq
		for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems():
			elems['root'][anno] = aa_seq
		write_json(elems, self.auspice_sequences_fname, indent=None)

		print "Writing tree"
		self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields)
		for node in all_descendants(self.tree_json):
			for attr in tree_pop_list:
				if attr in node:
					node.pop(attr, None)
			if "freq" in node:
				for reg in node["freq"]:
					try:
						node["freq"][reg] = [round(x,3) for x in node["freq"][reg]]
					except:
						node["freq"][reg] = "undefined"				

		if hasattr(self,"clade_designations"):
			# find basal node of clade and assign clade x and y values based on this basal node
			clade_present = {}
			clade_xval = {}
			clade_yval = {}
			self.frequencies['clades'] = {reg:{"pivots":list(self.tree.seed_node.pivots)} 
											for reg in self.tree.seed_node.freq}

			for clade, gt in self.clade_designations.iteritems():
				if clade in annotations:
					print "Annotating clade", clade
					tmp_nodes = sorted((node for node in self.tree.postorder_node_iter()
						if not node.is_leaf() and all([node.aa_seq[gene][pos-1]==aa for gene, pos, aa in gt])),
						key=lambda node: node.xvalue)
					if len(tmp_nodes):
						clade_present[clade] = True
						base_node = tmp_nodes[0]
						clade_xval[clade] = base_node.xvalue
						clade_yval[clade] = base_node.yvalue
						for region in base_node.freq:
							try:
								self.frequencies["clades"][region][clade.lower()] = [round(x,3) for x in base_node.freq[region]]
								print "added frequencies",region, clade
							except:
								print base_node.freq[region]
					else:
						clade_present[clade] = False
						print "clade",clade, gt, "not in tree"
			# append clades, coordinates and genotype to meta
			self.tree_json["clade_annotations"] = [(clade, clade_xval[clade],clade_yval[clade], 
								"/".join([gene+':'+str(pos)+aa for gene, pos, aa in gt]))
							for clade, gt in self.clade_designations.iteritems() 
							if clade in annotations and clade_present[clade] == True]
		write_json(self.tree_json, self.auspice_tree_fname, indent=None)
		try:
			read_json(self.auspice_tree_fname)
		except:
			print "Read failed, rewriting with indents"	
			write_json(self.tree_json, self.auspice_tree_fname, indent=1)
			
		# Include genotype frequencies
		if hasattr(self, 'frequencies'):
			if not hasattr(self, 'aa_entropy') and not hasattr(self, 'nuc_entropy'):
				self.determine_variable_positions()

			if hasattr(self, 'aa_entropy'):
				self.frequencies["entropy"] = {}
				self.frequencies["location"] = {}
				for anno, alnS in self.aa_entropy.iteritems():
					self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\
															int(self.cds[anno].location.start)]
					self.frequencies["entropy"][anno] = [ [pos, S, muts] for pos,S,muts in 
						izip(xrange(alnS.shape[0]), alnS,self.variable_aa_identities[anno]) ]
			elif seq=='nuc' and hasattr(self, 'nuc_entropy'):
				self.frequencies["entropy"] = [ [pos, S, muts] for pos,S,muts in 
						izip(xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,self.variable_nuc_identities) ]

			write_json(self.frequencies, self.auspice_frequency_fname)
			print("WRITEEN")
		# Write out metadata
		print "Writing out metadata"		
		meta = {}
		meta["updated"] = time.strftime("X%d %b %Y").replace('X0','X').replace('X','')
		try:
			from pygit2 import Repository, discover_repository
			current_working_directory = os.getcwd()
			repository_path = discover_repository(current_working_directory)
			repo = Repository(repository_path)
			commit_id = repo[repo.head.target].id
			meta["commit"] = str(commit_id)
		except ImportError:
			meta["commit"] = "unknown"
		
		if hasattr(self,"date_region_count"):
			meta["regions"] = self.regions
			meta["virus_stats"] = [ [str(y)+'-'+str(m)] + [self.date_region_count[(y,m)][reg] for reg in self.regions]
									for y,m in sorted(self.date_region_count.keys()) ]
		write_json(meta, self.auspice_meta_fname, indent=0)
Exemplo n.º 8
0
	def refine_tree(self):
		import tree_refine
		tree_refine.main(self.tree, self.viruses, config['outgroup'], config['cds'])
		write_json(dendropy_to_json(self.tree.seed_node), self.intermediate_tree_fname)
Exemplo n.º 9
0
	def export_to_auspice(self):
		import streamline
		tree_json = dendropy_to_json(self.tree.seed_node)
		streamline.main(tree_json, self.frequencies)