Exemplo n.º 1
0
 if col_name_with_keywords == "gene_host":
     associations = digestAssociationsKeeper.generate_genera_dict(
         df_to_digest[col_name_with_keywords].values.tolist())
 digest_df, raw_ds = digestAssociationsKeeper.digest_df(
     df_to_digest,
     associations=associations,
     columns_with_keywords=[col_name_with_keywords])
 raw_ds = Utilities.left_merge(
     raw_ds,
     handler.raw_annotated_pivot[RAW_LABEL_COL_NAME].reset_index(),
     REFERENCE_COL_NAME)
 raw_ds[RAW_LABEL_COL_NAME] = raw_ds[RAW_LABEL_COL_NAME].apply(
     lambda x: min(
         (j for j in str(x).strip().split(" ") if j), key=len))
 for sample_name in digest_df.columns:
     major_digest_df = Utilities.get_n_majors_from_df(
         digest_df, sample_name, n=INNER_DONUT_GROUPS - 1)
     sample_export_mask = InterpretationHandler.create_mirrored_path(
         [
             projectDescriber.DATA_DIGEST_DIR, value_col_name,
             col_name_with_keywords, sample_name
         ],
         makedirs=True)
     # Create visualization
     fig, ax = plt.subplots()
     _BASE_FONT_SIZE = 15
     plt.rcParams.update({
         "font.size": _BASE_FONT_SIZE,
         "figure.figsize": (20, 20)
     })
     ax.axis("equal")
     y_col_name = major_digest_df.columns[0]
Exemplo n.º 2
0
 def process(self):
     value_col_name_raw_pivot_annotated_mask = self.create_mirrored_path(
         [projectDescriber.DATA_DIGEST_DIR, self.value_col_name],
         makedirs=True)
     Utilities.dump_tsv(
         self.raw_annotated_pivot.reset_index(),
         "{}_raw_annotated_pivot.tsv".format(
             value_col_name_raw_pivot_annotated_mask))
     for col_name_with_keywords in KEYWORDS_ASSOCIATIVE_PAIRS:
         df_to_digest = self.raw_annotated_pivot.loc[:, [
             col_name_with_keywords
         ] + self.sample_names]
         associations = KEYWORDS_ASSOCIATIVE_PAIRS.get(
             col_name_with_keywords)
         if col_name_with_keywords == HOST_COL_NAME:
             associations = digestAssociationsKeeper.generate_genera_dict(
                 df_to_digest[col_name_with_keywords].values.tolist())
         digest_df, raw_ds = digestAssociationsKeeper.digest_df(
             df_to_digest,
             associations=associations,
             columns_with_keywords=[col_name_with_keywords])
         raw_ds = Utilities.left_merge(
             raw_ds,
             self.raw_annotated_pivot[RAW_LABEL_COL_NAME].reset_index(),
             REFERENCE_COL_NAME).fillna("")
         raw_ds[RAW_LABEL_COL_NAME] = raw_ds[RAW_LABEL_COL_NAME].apply(
             lambda x: min(Utilities.remove_empty_values(
                 [i for i in x.strip().split(" ")]),
                           key=len))
         keyword_export_mask = self.create_mirrored_path([
             projectDescriber.DATA_DIGEST_DIR, self.value_col_name,
             col_name_with_keywords
         ],
                                                         makedirs=True)
         Utilities.dump_tsv(digest_df.reset_index(),
                            "{}_digest.tsv".format(keyword_export_mask))
         Utilities.dump_tsv(raw_ds,
                            "{}_raw.tsv".format(keyword_export_mask))
         for sample_name in digest_df.columns:
             _BASE_FONT_SIZE = 15
             _WEDGE_WIDTH = 0.3
             _WEDGE_PROPERTIES = dict(width=_WEDGE_WIDTH, edgecolor="w")
             _LABEL_PROPERTIES = dict(fontsize=_BASE_FONT_SIZE,
                                      rotation_mode="anchor",
                                      verticalalignment="center",
                                      horizontalalignment="center")
             major_digest_df = Utilities.get_n_majors_from_df(
                 digest_df, sample_name, n=INNER_DONUT_GROUPS - 1)
             # Create visualization
             fig, ax = plt.subplots()
             plt.rcParams.update({
                 "font.size": _BASE_FONT_SIZE,
                 "figure.figsize": (20, 20)
             })
             ax.axis("equal")
             y_col_name = major_digest_df.columns[0]
             # Returning value: [[wedges...], [labels...], [values...]]
             pie_int = ax.pie(major_digest_df[sample_name],
                              radius=1 - _WEDGE_WIDTH,
                              labels=major_digest_df.index,
                              labeldistance=1 - _WEDGE_WIDTH,
                              rotatelabels=False,
                              autopct=self.make_autopct(
                                  major_digest_df[y_col_name]),
                              pctdistance=1 - _WEDGE_WIDTH / 2.0,
                              wedgeprops=_WEDGE_PROPERTIES,
                              textprops=_LABEL_PROPERTIES)
             # Combine color values in 'RGBA' format into the one dictionary
             pie_int_colors = {
                 pie_int[1][idx].get_text(): wedge.get_facecolor()
                 for idx, wedge in enumerate(pie_int[0])
             }
             # Manual sort the dataset with raw values prior to the order of digest keywords
             major_raw_ds = pd.DataFrame()
             for digest_keyword in major_digest_df.index:
                 if digest_keyword == "Other":
                     major_raw_ds_append = pd.DataFrame(
                         major_digest_df.loc["Other"]).transpose()
                     major_raw_ds_append.index.name = DIGEST_LABEL_COL_NAME
                     major_raw_ds_append = major_raw_ds_append.reset_index()
                 else:
                     major_raw_ds_append_right = raw_ds.loc[
                         raw_ds[DIGEST_LABEL_COL_NAME] == digest_keyword, [
                             REFERENCE_COL_NAME, sample_name,
                             DIGEST_LABEL_COL_NAME, RAW_LABEL_COL_NAME
                         ]]
                     major_raw_ds_append_left = Utilities.get_n_majors_from_df(
                         major_raw_ds_append_right.set_index(
                             REFERENCE_COL_NAME),
                         sample_name,
                         n=OUTER_DONUT_SUBGROUPS -
                         1).rename(index={
                             "Other": digest_keyword
                         }).reset_index()
                     major_raw_ds_append = Utilities.left_merge(
                         major_raw_ds_append_left,
                         major_raw_ds_append_right, REFERENCE_COL_NAME)
                     major_raw_ds_append[
                         RAW_LABEL_COL_NAME] = major_raw_ds_append[
                             RAW_LABEL_COL_NAME].fillna(
                                 "{}_Other".format(digest_keyword))
                     major_raw_ds_append[
                         DIGEST_LABEL_COL_NAME] = major_raw_ds_append[
                             DIGEST_LABEL_COL_NAME].fillna("Other")
                 pie_ext_append_colors = []
                 for row_number in major_raw_ds_append.index.values:
                     row_color = pie_int_colors.get(digest_keyword)
                     if not row_color:
                         continue
                     row_old_alpha = row_color[3]
                     _MINIMAL_ALPHA = 0.2
                     if major_raw_ds_append.shape[0] < 4:
                         row_new_alpha = row_old_alpha - (
                             row_old_alpha * row_number * _MINIMAL_ALPHA)
                     else:
                         row_new_alpha = row_old_alpha - (
                             (row_old_alpha - _MINIMAL_ALPHA) * row_number /
                             float(major_raw_ds_append.shape[0] - 1))
                     pie_ext_append_colors.append(";".join(
                         str(i)
                         for i in list(row_color[:3]) + [row_new_alpha]))
                 major_raw_ds_append["color"] = pie_ext_append_colors
                 if major_raw_ds_append.shape[0] > 0:
                     if major_raw_ds.shape[0] == 0:
                         major_raw_ds = major_raw_ds_append
                     else:
                         major_raw_ds = pd.concat(
                             [major_raw_ds, major_raw_ds_append],
                             axis=0,
                             ignore_index=True,
                             sort=False)
             major_raw_ds = major_raw_ds.fillna("Other")
             pie_ext = ax.pie(
                 major_raw_ds[sample_name],
                 radius=1,
                 labels=major_raw_ds[RAW_LABEL_COL_NAME],
                 labeldistance=1 - _WEDGE_WIDTH / 2,
                 rotatelabels=True,
                 wedgeprops=_WEDGE_PROPERTIES,
                 textprops=_LABEL_PROPERTIES,
                 colors=major_raw_ds["color"].apply(lambda x: tuple(
                     float(i) for i in x.split(";"))).values.tolist())
             # Export visualization tables
             sample_export_mask = self.create_mirrored_path([
                 projectDescriber.DATA_DIGEST_DIR, self.value_col_name,
                 col_name_with_keywords, sample_name
             ],
                                                            makedirs=True)
             Utilities.dump_tsv(
                 major_digest_df.reset_index(),
                 "{}_inner_values.tsv".format(sample_export_mask))
             Utilities.dump_tsv(
                 major_raw_ds,
                 "{}_outer_values.tsv".format(sample_export_mask))
             # Set labels
             ax.set_xlabel(y_col_name)
             ax.set_ylabel(self.value_col_name)
             plt.tight_layout()
             # Export PNG
             pie_file = "{}_double_donut.png".format(sample_export_mask)
             fig.suptitle(pie_file, fontsize=_BASE_FONT_SIZE)
             plt.savefig(pie_file, dpi=300, bbox_inches="tight")
             plt.close("all")
             plt.clf()