def get_expression_profile(expression_level, genes, expression_bins, input_format, output_format, species, tmp, symmetric_expression): df = pd.DataFrame({'genes': genes, 'expression_level': expression_level}) df = df[df.iloc[:, 1].notna()] df = df.sort_values(by=df.columns[1]) expression_level = np.array(df.iloc[:, 1]) if symmetric_expression: left = MI.discretize(expression_level[expression_level < 0], expression_bins // 2) right = MI.discretize(expression_level[expression_level >= 0], expression_bins // 2 + expression_bins % 2) right += expression_bins // 2 expression_profile = np.concatenate((left, right)) else: expression_profile = MI.discretize(expression_level, expression_bins) genes = list(df.iloc[:, 0]) genes = [gene.split('.')[0] for gene in genes] if input_format and output_format and input_format != output_format: genes = change_accessions(genes, input_format, output_format, species, tmp) gene_dict = dict(zip(genes, expression_profile)) expression_profile = np.array( [gene_dict[gene] for gene in gene_dict.keys() if gene != '-']) genes = [gene for gene in gene_dict.keys() if gene != '-'] return expression_profile, genes