def build_database(connection): """Build the hpo database Args: conn (sqlite3.connect): A database connection object """ logger.info("Fetching disease to gene file") disease_to_genes = getreader('utf-8')(gzip.open(disease_to_genes_path), errors='replace') logger.info("Fetching phenotypes file") phenotype_to_genes = getreader('utf-8')(gzip.open(phenotypes_path), errors='replace') logger.info("Fetching genes file") genes = getreader('utf-8')(gzip.open(genes_path), errors='replace') logger.info("Parsing phenotypes to gene file") phenotypes = parse_phenotypes(phenotype_to_genes) logger.info("Parsing disease to gene file") diseases = parse_diseases(disease_to_genes) logger.info("Parsing genes file") genes = parse_genes(genes) logger.info("Populating hpo table") populate_hpo(connection=connection, hpo_terms=phenotypes) logger.debug("Hpo table populated") logger.info("Populating disease table") populate_disease(connection=connection, disease_terms=diseases) logger.debug("Disease table populated") logger.info("Populating gene table") populate_genes(connection=connection, genes=genes) logger.debug("Gene table populated")
def genes(request): """Get hpo terms""" gene_lines = [ "#chrom\tstart\tstop\tensembl_id\tdescription\thgnc_symbol\thi_score\t"\ "constraint_score", "2\t203044020\t203044694\tENSG00000227890\tproteasome (prosome,"\ " macropain) subunit, alpha type, 2 pseudogene 3\tPSMA2P3\t\t", "11\t87040449\t87041094\tENSG00000254582\tproteasome (prosome,"\ " macropain) subunit, alpha type, 2 pseudogene 1\tPSMA2P1\t\t", "5\t149737202\t149779871\tENSG00000070814\tTreacher Collins-"\ "Franceschetti syndrome 1\tTCOF1\t0.202\t4.98430839027322", "17\t28442539\t28513493\tENSG00000126653\tnuclear speckle splicing"\ " regulatory protein 1\tNSRP1\t\t2.9732792450519", ] return parse_genes(gene_lines)
def build_database(connection): """Build the hpo database Args: conn (sqlite3.connect): A database connection object """ logger.info("Fetching disease to gene file") disease_to_genes = getreader('utf-8')( gzip.open(disease_to_genes_path), errors='replace') logger.info("Fetching phenotypes file") phenotype_to_genes = getreader('utf-8')( gzip.open(phenotypes_path), errors='replace') logger.info("Fetching genes file") genes = getreader('utf-8')( gzip.open(genes_path), errors='replace') logger.info("Parsing phenotypes to gene file") phenotypes = parse_phenotypes(phenotype_to_genes) logger.info("Parsing disease to gene file") diseases = parse_diseases(disease_to_genes) logger.info("Parsing genes file") genes = parse_genes(genes) logger.info("Populating hpo table") populate_hpo( connection=connection, hpo_terms=phenotypes ) logger.debug("Hpo table populated") logger.info("Populating disease table") populate_disease( connection=connection, disease_terms=diseases ) logger.debug("Disease table populated") logger.info("Populating gene table") populate_genes( connection=connection, genes=genes ) logger.debug("Gene table populated")