def get_ham_treemap_from_row(row, tree, level=None): fam, orthoxml = row orthoxml = switch_name_ncbi_id(orthoxml) try: if level is None: ham_obj = pyham.Ham(tree, orthoxml, type_hog_file="orthoxml", use_internal_name=True, orthoXML_as_string=True) tp = ham_obj.create_tree_profile( hog=ham_obj.get_list_top_level_hogs()[0]) return tp.treemap else: ham_obj = pyham.Ham(tree, orthoxml, type_hog_file="orthoxml", use_internal_name=True, orthoXML_as_string=True) #return subHOGs at level slice = ham_obj.get_ancestral_genome_by_name(level) treeprofiles = [ ham_obj.create_tree_profile(hog=h) for h in ham_obj.get_list_top_level_hogs()[0].get_at_level(slice) ] except TypeError as err: print('Type error:', err) return None except AttributeError as err: print('Attribute error:', err) return None
def _load_ogs(self): """ Using the orthoxml file select only the OGs of interest that have more species than the min_species threshold :return: Dictionary with og name as key and list of SeqRecords """ if '.fa' in self.args.dna_reference or '.fasta' in self.args.dna_reference: print('--- Load ogs and find their corresponding DNA seq from {} ---'.format(self.args.dna_reference)) print( 'Loading {} into memory. This might take a while . . . '.format(self.args.dna_reference.split("/")[-1])) self._db = SeqIO.index(self.args.dna_reference, "fasta") self._db_source = 'fa' elif '.h5' in self.args.dna_reference: print('--- Load ogs and find their corresponding DNA seq from {} ---'.format(self.args.dna_reference)) self._db = db.Database(self.args.dna_reference) self._db_id_map = db.OmaIdMapper(self._db) self._db_source = 'h5' else: print('--- Load ogs and find their corresponding DNA seq using the REST api ---') self._db_source = 'REST_api' if self.oma.mode is 'standalone': self._og_orthoxml = os.path.join(self.oma_output_path, 'OrthologousGroups.orthoxml') self._tree_str = os.path.join(self.oma_output_path, 'EstimatedSpeciesTree.nwk') self._ham_analysis = pyham.Ham(self._tree_str, self._og_orthoxml, use_internal_name=False) ogs = {} orthologous_groups_aa = os.path.join(self.args.output_path, "01_ref_ogs_aa") if not os.path.exists(orthologous_groups_aa): os.makedirs(orthologous_groups_aa) orthologous_groups_dna = os.path.join(self.args.output_path, "01_ref_ogs_dna") if not os.path.exists(orthologous_groups_dna): os.makedirs(orthologous_groups_dna) names_og = self.ogs for name, records in tqdm(names_og.items(), desc='Loading OGs', unit=' OGs'): # name = file.split("/")[-1].split(".")[0] ogs[name] = OG() ogs[name].aa = self._get_aa_records(name, records) output_file_aa = os.path.join(orthologous_groups_aa, name + ".fa") output_file_dna = os.path.join(orthologous_groups_dna, name + ".fa") if self._db_source: ogs[name].dna = self._get_dna_records(ogs[name].aa, name) else: print("DNA reference was not provided. Only amino acid sequences gathered!") self._write(output_file_dna, ogs[name].dna) self._write(output_file_aa, ogs[name].aa) return ogs
def orthoxml_parsing(orthoxml, nwk, species_list, gene_hog_dict): ham_analysis = pyham.Ham(nwk, orthoxml) for species in species_list: sp_extant_genome = ham_analysis.get_extant_genome_by_name(species) for gene in sp_extant_genome.genes: xref = gene.get_dict_xref()['protId'].split(" ")[0] hog = gene.get_top_level_hog() if hog.is_singleton(): gene_hog_dict[xref] = "singleton" else: gene_hog_dict[xref] = gene.get_top_level_hog().hog_id
# This is the HAM package import pyham # OPTIONAL: only if you want to have the logger information printed import logging logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s") # Initialise pyHam with a gene oma id query = 'HUMAN12' pyham_analysis = pyham.Ham(query_database=query, use_data_from='oma') hog = pyham_analysis.get_hog_by_id('HOG:0359282') # create the iHam for it and store it into an html file output_name = "iHam{}.html".format(hog.hog_id) pyham_analysis.create_iHam(hog=hog, outfile=output_name) # create the iHam for it and store it into an html file output_name = "TreeProfile{}.html".format(hog.hog_id) pyham_analysis.create_tree_profile(hog=hog, outfile=output_name)
# This is the HAM package import pyham # OPTIONAL: only if you want to have the logger information printed import logging logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s") # Initialise pyHam with a phyloxml tree and orthoXML HOGs phyloxml_path = "simpleEx.phyloxml" orthoxml_path3 = "simpleEx.orthoxml" pyham_analysis = pyham.Ham(phyloxml_path, orthoxml_path3, use_internal_name=True, tree_format='phyloxml') hog = pyham_analysis.get_hog_by_id('HOG:0355161') # create the iHam for it and store it into an html file output_name = "iHam{}.html".format(hog.hog_id) pyham_analysis.create_iHam(hog=hog, outfile=output_name)
second=second_ancestor, results_tag=results_tag), 'a') as output_txt: for xref in results_xref: output_txt.write("{xref}\n".format(xref=xref)) if __name__ == "__main__": # All genes in ancestor genomes according to pyHAM: digenea_ancestor_all, platy_ancestor_all, = [], [] # Filtered sets of genes from ancestor genomes: digenea_ancestor_filtered, platy_ancestor_filtered = [], [] # For vertical comparison: retained_gene_ids, duplicated_gene_ids, gained_gene_ids = [], [], [] # pyHAM analysis: ham_analysis = pyham.Ham(nwk_file, orthoxml_file, use_internal_name=True) # Ancestor genome reconstruction: digenea_ancestor_genome = ham_analysis.get_ancestral_genome_by_name( "Digenea") platy_ancestor_genome = ham_analysis.get_ancestral_genome_by_name( "Platyhelminthes") digenea_ancestor_all.extend(digenea_ancestor_genome.genes) print( "According to pyHAM, Digenea ancestor genome model includes {num} genes" .format(num=len(digenea_ancestor_all))) platy_ancestor_all.extend(platy_ancestor_genome.genes) print( "According to pyHAM, Platyhelminthes ancestor genome model includes {num} genes" .format(num=len(platy_ancestor_all)))