def x12_microarray_disease(file, truncate): name = [ 'gene', 'ensembl_id', 'retina', 'corneal', 'corneal_epithelium', 'corneal_endothelium', 'conjunctiva', 'optic_nerve_head', 'lymphoblast', 'eye_orbit', 'lacrimal_gland', 'thyroid', 'normal_uveal_melanocytes', 'retinal_detachment', 'diabetic_retinopathy', 'retinoblastoma', 'retinitis_pigmentosa', 'keratoconus', 'keratitis', 'trachoma', 'glaucoma', 'fuchs_endothelial_corneal_dystrophy', 'uveal_melanoma', 'uveal_melanoma_mum2b', 'uveal_melanoma_ocm1a', 'graves_ophthalmopathy', 'nonspecific_orbital_inflammation', 'sarcoidosis', 'granulomatosis_with_polyangiitis', 'thyroid_eye_disease' ] micro_array = pd.read_csv(file, delimiter='\t', header=None, names=name) micro_array = micro_array[[ 'gene', 'ensembl_id', 'retinal_detachment', 'diabetic_retinopathy', 'retinoblastoma', 'retinitis_pigmentosa', 'keratoconus', 'keratitis', 'trachoma', 'glaucoma', 'fuchs_endothelial_corneal_dystrophy', 'uveal_melanoma', 'uveal_melanoma_mum2b', 'uveal_melanoma_ocm1a', 'graves_ophthalmopathy', 'nonspecific_orbital_inflammation', 'sarcoidosis', 'granulomatosis_with_polyangiitis', 'thyroid_eye_disease' ]] micro_array = insert_uuid4(micro_array) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.expression_microarray_disease''') save_dataframe_using_copy(conn, micro_array, 'public', 'expression_microarray_disease')
def x05_gnomad(file, truncate): gnomad = read_gnomad(file) gnomad = insert_uuid4(gnomad) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.gnomad''') save_dataframe_using_copy(conn, gnomad, 'public', 'gnomad')
def x04_causality(file, truncate): name = ['variant', 'p_value', 'beta', 'disease'] causality = pd.read_csv(file, delimiter='\t', header=None, names=name) causality = insert_uuid4(causality) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.causality''') save_dataframe_using_copy(conn, causality, 'public', 'causality')
def x18_network_disease(file, truncate): name = ['gene', 'weight', 'disease', 'dataset'] net = pd.read_csv(file, delimiter='\t', header=None, names=name) net = insert_uuid4(net) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.gene_network_disease''') save_dataframe_using_copy(conn, net, 'public', 'gene_network_disease')
def x21_gene_significance(file, truncate): name = ['gene', 'amd', 'dr', 'kc', 'glc', 'rp', 'rb'] gene = pd.read_csv(file, delimiter='\t', header=None, names=name) gene = insert_uuid4(gene) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.disease_go''') save_dataframe_using_copy(conn, gene, 'public', 'gene_disease_significance')
def x25_epigenetic_alteration(file, truncate): name = ['gene', 'normal_retina', 'amd_retina', 'normal_rpe', 'amd_rpe'] gene = pd.read_csv(file, delimiter='\t', header=None, names=name) gene = insert_uuid4(gene) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.epigenetic_alteration''') save_dataframe_using_copy(conn, gene, 'public', 'epigenetic_alteration')
def x17_single_out(file, truncate): name = ['name', 'xaxis', 'yaxis', 'cluster', 'labels'] out = pd.read_csv(file, delimiter='\t', header=None, names=name) out = insert_uuid4(out) out = insert_file_out(out, file) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.markers_cell''') save_dataframe_using_copy(conn, out, 'public', 'out_cell')
def x07_omim(file, truncate): name = [ 'gene', 'gene_name', 'variant', 'band', 'omim', 'ensembl', 'disease', 'confidence', 'phenotypes_in_omim', 'pubmed' ] omim = pd.read_csv(file, delimiter='\t', header=None, names=name) omim = insert_uuid4(omim) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.genetic_omim''') save_dataframe_using_copy(conn, omim, 'public', 'genetic_omim')
def x22_gene_interaction(file, database, truncate): dis = pd.read_csv(file, delimiter='\t', error_bad_lines=False, chunksize=5000000, header=None, names=['gene', 'contrast_gene', 'weight']) for chunk in dis: engine = db.engine with engine.begin() as conn: if truncate: conn.execute(f'''truncate public.{database}''') save_dataframe_using_copy(conn, chunk, 'public', database)
def x02_gene(file, truncate): name = [ 'symbol', 'name', 'synonyms', 'gene_type', 'location', 'strand', 'description', 'omim', 'ensembl', 'clinvar', 'decipher', 'gnomad', 'panelapp', 'eye_disease', 'phenotypes', 'drugbank_id', 'drug_target' ] gene = pd.read_csv(file, delimiter='\t', header=None, names=name) gene = insert_uuid4(gene) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.gene''') save_dataframe_using_copy(conn, gene, 'public', 'gene')
def x06_gwas(file, truncate): name = [ 'gene_id', 'gene', 'band', 'variant', 'ensembl', 'major_allele', 'minor_allele', 'p_value', 'beta', 'context', 'cadd', 'initial_sample_size', 'peplication_sample_size', 'pubmed', 'disease' ] gwas = pd.read_csv(file, delimiter='\t', header=None, names=name) gwas = insert_uuid4(gwas) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.genetic_gwas''') save_dataframe_using_copy(conn, gwas, 'public', 'genetic_gwas')
def x01_snp_gene_summary(file, truncate): name = [ 'snpid', 'position_hg38', 'major_allele', 'variant', 'protein', 'polyPhen', 'cadd', 'sift', 'gerp', 'gene', 'ensembl', 'dbsnp', 'gnomad' ] summary = pd.read_csv(file, delimiter='\t', header=None, names=name) summary = insert_uuid4(summary) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.summary''') save_dataframe_using_copy(conn, summary, 'public', 'summary')
def x03_gene_expression(file, limit, truncate): gene = read_gene_expression(file) gene = insert_uuid4(gene) # gene = gene.loc[10000001:20175936] todo 数据太大,分批导入 if limit: click.echo('Limit data to: ' + limit) gene = gene[gene['gene'] == limit] engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.gene_expression''') save_dataframe_using_copy(conn, gene, 'public', 'gene_expression')
def x19_go(file, truncate): name = [ 'category', 'term', 'count', 'percent', 'p_value', 'genes', 'list_total', 'pop_hits', 'pop_total', 'fold_enrichment', 'bonferroni', 'benjamini', 'fdr' ] dis_go = pd.read_csv(file, delimiter='\t', header=None, names=name) dis_go = insert_dis_type(dis_go, file) dis_go = insert_uuid4(dis_go) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.disease_go''') save_dataframe_using_copy(conn, dis_go, 'public', 'disease_go')
def x14_mouse_expression(file, truncate): name = [ 'gene', 'human', 'e10_5', 'e11_5', 'e12_5', 'e13_5', 'web1_e10_5__12_5', 'web2_e11_5__13_5', 'p8_a', 'p8_b', 'p12_a', 'p12_b', 'p20_a', 'p20_b', 'p42_a', 'p42_b', 'p52_a', 'p52_b', 'web_p10_11_12_a', 'web_p10_11_12_b' ] mouse = pd.read_csv(file, delimiter='\t', header=None, names=name) mouse = insert_uuid4(mouse) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.expression_mouse''') save_dataframe_using_copy(conn, mouse, 'public', 'expression_mouse')
def x27_tissue_correct(file, tissue, truncate): tis = pd.read_csv(file, sep='\t', delimiter=' ') tis = insert_uuid4(tis) tis.rename(columns={ 'Symbol': 'gene', 'Module': 'module', 'KME': 'kme' }, inplace=True) tis['tissue'] = tissue engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.disease_go''') save_dataframe_using_copy(conn, tis, 'public', 'gene_tissue_correct')
def x20_tissue_significance(file, truncate): name = [ 'gene_symbol', 'corneas', 'corneal_endothelial_cells', 'retina', 'retina_macula', 'retina_non_macula', 'rpe_macula', 'rpe_non_macula', 'retinal_endothelial_cells', 'ipsc_derived_retinal_organoids', 'trabecular_meshwork_cells' ] tis = pd.read_csv(file, delimiter='\t', header=None, names=name) tis = insert_uuid4(tis) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.tissue_gene_significance''') save_dataframe_using_copy(conn, tis, 'public', 'tissue_gene_significance')
def x15_single_markers(file, truncate): file_name = os.path.basename(file).split('.')[0] name = ['gene', 'cluster', 'labels', 'p_val', 'avg', 'pct1', 'pct2' ] if file_name == 'GSE107618' else [ 'gene', 'cluster', 'labels', 'p_val', 'avg', 'p_val_adj', 'pct1', 'pct2' ] markers = pd.read_csv(file, delimiter='\t', header=None, names=name) markers = insert_uuid4(markers) markers = insert_file_marker(markers, file) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.markers_cell''') save_dataframe_using_copy(conn, markers, 'public', 'markers_cell')
def x13_rna_disease(file, truncate): name = [ 'gene', 'ensembl_id', 'corneas', 'corneal_endothelial_cells', 'retina', 'retina_macula', 'retina_non_macula', 'rpe_macula', 'rpe_non_macula', 'retinal_endothelial_cells', 'ipsc_derived_retinal_organoids', 'trabecular_meshwork_cells', 'age_related_macular_degeneration', 'diabetic_retinopathy', 'keratoconus', 'primary_open_angle_glaucoma', 'retinitis_pigmentosa', 'retinoblastoma' ] rna_seq = pd.read_csv(file, delimiter='\t', header=None, names=name) rna_seq = rna_seq[[ 'gene', 'ensembl_id', 'age_related_macular_degeneration', 'diabetic_retinopathy', 'keratoconus', 'primary_open_angle_glaucoma', 'retinitis_pigmentosa', 'retinoblastoma' ]] rna_seq = insert_uuid4(rna_seq) engine = db.engine with engine.begin() as conn: if truncate: conn.execute('''truncate public.expression_rna_seq_disease''') save_dataframe_using_copy(conn, rna_seq, 'public', 'expression_rna_seq_disease')