import pandas as pd from db_engine import create from arrays import make_array from numpy.linalg import norm engine = create() # database connection created using SQLAlchemy library # Count distance on genome engine.execute( "ALTER TABLE sahlen_promoter_enhancer ADD COLUMN genome_distance INT") engine.execute( "UPDATE sahlen_promoter_enhancer SET genome_distance = " "ABS(Fragment_end_coordinate-(Fragment_end_coordinate-Fragment_start_coordinate)/2 " # enhancer middle "- Promoter_TSS) WHERE Promoter_chr=Fragment_chromosome") # Count degree of sequences engine.execute( "CREATE TABLE sahlen_frag_per_promo " "SELECT Promoter_chr, Promoter_TSS, new_promo_start, new_promo_end, " "count(distinct Fragment_chromosome, new_enh_start, new_enh_end) " "AS degree " "FROM sahlen_promoter_enhancer " "GROUP BY Promoter_chr, new_promo_start, new_promo_end") engine.execute( "CREATE TABLE sahlen_promo_per_frag " "SELECT Fragment_chromosome, Fragment_start_coordinate, Fragment_end_coordinate, " "new_enh_start, new_enh_end, " "count(distinct Promoter_chr, new_promo_start, new_promo_end) "
import pandas as pd from numpy.linalg import norm from arrays import make_array from db_engine import create engine = create() engine.execute("CREATE TABLE sahlen_promo_per_promo SELECT `Promoter chr`, new_promo_start, new_promo_end, " "count(distinct `Promoter chr.1`, new_promo2_start, new_promo2_end) as " "num from sahlen_promoter_promoter group by `Promoter chr`, new_promo_start, new_promo_end", con=engine) engine.execute("CREATE TABLE sahlen_range_promo_per_promo " "SELECT MAX(`Promoter TSS.1`)-MIN(`Promoter TSS.1`) " "AS promo_range, `Promoter TSS`, `Promoter chr` " "FROM sahlen_promoter_promoter " "GROUP BY `Promoter chr`, `Promoter TSS`") dataset = "sahlen" path = "/home/kinga/Dokumenty/Studia/licencjat_old/data/" engine.execute("ALTER TABLE sahlen_promoter_promoter ADD kmer_distance FLOAT") sahlen = pd.read_sql("SELECT new_promo_start, new_promo_end, new_promo2_start, new_promo2_end, " "`Promoter chr`, `Promoter chr.1` from " "sahlen_promoter_promoter " # where kmer_distance is null or kmer_distance=0 , con=engine) sahlen.columns = sahlen.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('.', '')