def to_json_file(conn, result_dir, output_dir): ratios = read_ratios(result_dir) cursor = conn.cursor() cursor.execute("select max(iteration) from row_members") iteration = cursor.fetchone()[0] cursor.execute("select distinct cluster from row_members where iteration=?", [iteration]) clusters = [row[0] for row in cursor.fetchall()] result = {} for cluster in clusters: cursor.execute( "select name from row_names rn join row_members rm on rn.order_num=rm.order_num where cluster=? and iteration=?", [cluster, iteration], ) genes = [row[0] for row in cursor.fetchall()] cursor.execute( "select name from column_names cn join column_members cm on cn.order_num=cm.order_num where cluster=? and iteration=?", [cluster, iteration], ) cluster_conds = [row[0] for row in cursor.fetchall()] cluster_data = ratios.loc[genes, cluster_conds] values = [ {"gene": gene, "condition": cond, "value": cluster_data.values[rindex, cindex]} for rindex, gene in enumerate(genes) for cindex, cond in enumerate(cluster_conds) ] result[str(cluster)] = values buffer = json.dumps(result) with open(os.path.join(output_dir, "cluster_expressions.json"), "w") as out: out.write(buffer)
def generate_plots(session, result_dir, output_dir): ratios = read_ratios(result_dir) iteration = session.query(func.max(cm2db.RowMember.iteration)) clusters = [r[0] for r in session.query(cm2db.RowMember.cluster).distinct().filter( cm2db.RowMember.iteration == iteration)] figure = plt.figure(figsize=(6,3)) for cluster in clusters: plt.clf() plt.cla() genes = [r.row_name.name for r in session.query(cm2db.RowMember).filter( and_(cm2db.RowMember.cluster == cluster, cm2db.RowMember.iteration == iteration))] cluster_conds = [c.column_name.name for c in session.query(cm2db.ColumnMember).filter( and_(cm2db.ColumnMember.cluster == cluster, cm2db.ColumnMember.iteration == iteration))] all_conds = [c[0] for c in session.query(cm2db.ColumnName.name).distinct()] non_cluster_conds = [cond for cond in all_conds if not cond in set(cluster_conds)] cluster_data = ratios.loc[genes, cluster_conds] non_cluster_data = ratios.loc[genes, non_cluster_conds] min_value = ratios.min() max_value = ratios.max() for gene in genes: values = [normalize_js(val) for val in cluster_data.loc[gene,:].values] values += [normalize_js(val) for val in non_cluster_data.loc[gene,:].values] plt.plot(values) # plot the "in"/"out" separator line cut_line = len(cluster_conds) plt.plot([cut_line, cut_line], [min_value, max_value], color='red', linestyle='--', linewidth=1) plt.savefig(os.path.join(output_dir, "exp-%d" % cluster)) plt.close(figure)
def to_json_file(conn, result_dir, output_dir): ratios = read_ratios(result_dir) cursor = conn.cursor() cursor.execute('select max(iteration) from row_members') iteration = cursor.fetchone()[0] cursor.execute( 'select distinct cluster from row_members where iteration=?', [iteration]) clusters = [row[0] for row in cursor.fetchall()] result = {} for cluster in clusters: cursor.execute( 'select name from row_names rn join row_members rm on rn.order_num=rm.order_num where cluster=? and iteration=?', [cluster, iteration]) genes = [row[0] for row in cursor.fetchall()] cursor.execute( 'select name from column_names cn join column_members cm on cn.order_num=cm.order_num where cluster=? and iteration=?', [cluster, iteration]) cluster_conds = [row[0] for row in cursor.fetchall()] cluster_data = ratios.loc[genes, cluster_conds] values = [{ 'gene': gene, 'condition': cond, 'value': cluster_data.values[rindex, cindex] } for rindex, gene in enumerate(genes) for cindex, cond in enumerate(cluster_conds)] result[str(cluster)] = values buffer = json.dumps(result) with open(os.path.join(output_dir, 'cluster_expressions.json'), 'w') as out: out.write(buffer)
def cluster_expressions_to_json_file(session, result_dir, output_dir): ratios = read_ratios(result_dir) iteration = session.query(func.max(cm2db.RowMember.iteration)) clusters = [ r[0] for r in session.query(cm2db.RowMember.cluster).distinct().filter( cm2db.RowMember.iteration == iteration) ] result = {} for cluster in clusters: genes = [ r.row_name.name for r in session.query(cm2db.RowMember).filter( and_(cm2db.RowMember.cluster == cluster, cm2db.RowMember.iteration == iteration)) ] cluster_conds = [ c.column_name.name for c in session.query(cm2db.ColumnMember).filter( and_(cm2db.ColumnMember.cluster == cluster, cm2db.ColumnMember.iteration == iteration)) ] cluster_data = ratios.loc[genes, cluster_conds] values = [{ 'gene': gene, 'condition': cond, 'value': cluster_data.values[rindex, cindex] } for rindex, gene in enumerate(genes) for cindex, cond in enumerate(cluster_conds)] result[str(cluster)] = values buffer = json.dumps(result) with open(os.path.join(output_dir, 'cluster_expressions.json'), 'w') as out: out.write(buffer)
def generate_plots(conn, result_dir, output_dir): ratios = read_ratios(result_dir) cursor = conn.cursor() cursor.execute('select max(iteration) from row_members') iteration = cursor.fetchone()[0] cursor.execute( 'select distinct cluster from row_members where iteration=?', [iteration]) clusters = [row[0] for row in cursor.fetchall()] figure = plt.figure(figsize=(6, 3)) for cluster in clusters: plt.clf() plt.cla() cursor.execute( 'select distinct name from row_members rm join row_names rn on rm.order_num=rn.order_num where cluster=? and iteration=?', [cluster, iteration]) genes = [row[0] for row in cursor.fetchall()] cursor.execute( 'select distinct name from column_members cm join column_names cn on cm.order_num=cn.order_num where cluster=? and iteration=?', [cluster, iteration]) cluster_conds = [row[0] for row in cursor.fetchall()] cursor.execute('select distinct name from column_names') all_conds = [row[0] for row in cursor.fetchall()] non_cluster_conds = [ cond for cond in all_conds if not cond in set(cluster_conds) ] cluster_data = ratios.loc[genes, cluster_conds] non_cluster_data = ratios.loc[genes, non_cluster_conds] min_value = ratios.min() max_value = ratios.max() for gene in genes: values = [ normalize_js(val) for val in cluster_data.loc[gene, :].values ] values += [ normalize_js(val) for val in non_cluster_data.loc[gene, :].values ] plt.plot(values) # plot the "in"/"out" separator line cut_line = len(cluster_conds) plt.plot([cut_line, cut_line], [min_value, max_value], color='red', linestyle='--', linewidth=1) plt.savefig(os.path.join(output_dir, "exp-%d" % cluster)) plt.close(figure)
def generate_plots(session, result_dir, output_dir): ratios = read_ratios(result_dir) iteration = session.query(func.max(cm2db.RowMember.iteration)) clusters = [ r[0] for r in session.query(cm2db.RowMember.cluster).distinct().filter( cm2db.RowMember.iteration == iteration) ] figure = plt.figure(figsize=(6, 3)) for cluster in clusters: plt.clf() plt.cla() genes = [ r.row_name.name for r in session.query(cm2db.RowMember).filter( and_(cm2db.RowMember.cluster == cluster, cm2db.RowMember.iteration == iteration)) ] cluster_conds = [ c.column_name.name for c in session.query(cm2db.ColumnMember).filter( and_(cm2db.ColumnMember.cluster == cluster, cm2db.ColumnMember.iteration == iteration)) ] all_conds = [ c[0] for c in session.query(cm2db.ColumnName.name).distinct() ] non_cluster_conds = [ cond for cond in all_conds if not cond in set(cluster_conds) ] cluster_data = ratios.loc[genes, cluster_conds] non_cluster_data = ratios.loc[genes, non_cluster_conds] min_value = ratios.min() max_value = ratios.max() for gene in genes: values = [ normalize_js(val) for val in cluster_data.loc[gene, :].values ] values += [ normalize_js(val) for val in non_cluster_data.loc[gene, :].values ] plt.plot(values) # plot the "in"/"out" separator line cut_line = len(cluster_conds) plt.plot([cut_line, cut_line], [min_value, max_value], color='red', linestyle='--', linewidth=1) plt.savefig(os.path.join(output_dir, "exp-%d" % cluster)) plt.close(figure)
def cluster_expressions_to_json_file(session, result_dir, output_dir): ratios = read_ratios(result_dir) iteration = session.query(func.max(cm2db.RowMember.iteration)) clusters = [r[0] for r in session.query(cm2db.RowMember.cluster).distinct().filter( cm2db.RowMember.iteration == iteration)] result = {} for cluster in clusters: genes = [r.row_name.name for r in session.query(cm2db.RowMember).filter( and_(cm2db.RowMember.cluster == cluster, cm2db.RowMember.iteration == iteration))] cluster_conds = [c.column_name.name for c in session.query(cm2db.ColumnMember).filter( and_(cm2db.ColumnMember.cluster == cluster, cm2db.ColumnMember.iteration == iteration))] cluster_data = ratios.loc[genes, cluster_conds] values = [{'gene': gene, 'condition': cond, 'value': cluster_data.values[rindex, cindex]} for rindex, gene in enumerate(genes) for cindex, cond in enumerate(cluster_conds)] result[str(cluster)] = values buffer = json.dumps(result) with open(os.path.join(output_dir, 'cluster_expressions.json'), 'w') as out: out.write(buffer)
def generate_plots(conn, result_dir, output_dir): ratios = read_ratios(result_dir) cursor = conn.cursor() cursor.execute('select max(iteration) from row_members') iteration = cursor.fetchone()[0] cursor.execute('select distinct cluster from row_members where iteration=?', [iteration]) clusters = [row[0] for row in cursor.fetchall()] figure = plt.figure(figsize=(6,3)) for cluster in clusters: plt.clf() plt.cla() cursor.execute('select distinct name from row_members rm join row_names rn on rm.order_num=rn.order_num where cluster=? and iteration=?', [cluster, iteration]) genes = [row[0] for row in cursor.fetchall()] cursor.execute('select distinct name from column_members cm join column_names cn on cm.order_num=cn.order_num where cluster=? and iteration=?', [cluster, iteration]) cluster_conds = [row[0] for row in cursor.fetchall()] cursor.execute('select distinct name from column_names') all_conds = [row[0] for row in cursor.fetchall()] non_cluster_conds = [cond for cond in all_conds if not cond in set(cluster_conds)] cluster_data = ratios.loc[genes, cluster_conds] non_cluster_data = ratios.loc[genes, non_cluster_conds] min_value = ratios.min() max_value = ratios.max() for gene in genes: values = [normalize_js(val) for val in cluster_data.loc[gene,:].values] values += [normalize_js(val) for val in non_cluster_data.loc[gene,:].values] plt.plot(values) # plot the "in"/"out" separator line cut_line = len(cluster_conds) plt.plot([cut_line, cut_line], [min_value, max_value], color='red', linestyle='--', linewidth=1) plt.savefig(os.path.join(output_dir, "exp-%d" % cluster)) plt.close(figure)