def __color_row(sheet: opx.worksheet.worksheet.Worksheet, row: int, color: str, fill_type: Optional[str] = "solid") -> None: """ Color the row with desire color :param sheet: Sheet where the row will be colored :param row: Row to be colored accessed with cell.row :param color: Color in hexadecimal notation :param fill_type: How the row will be colored :return: None """ for row_cells in sheet.iter_cols(1, sheet.max_column, row, row): for cell in row_cells: cell.fill = opx_style.PatternFill(start_color=color, end_color=color, fill_type=fill_type)
def _get_col_dict(self, sheet: pxl.worksheet.worksheet.Worksheet) -> dict: """Retrieve column names and their indices under a certain sheet (subsystem) Args: sheet (pxl.worksheet.worksheet.Worksheet): the sheet to retrieve from Returns: dict: column name as key, column index as value. """ col_dict = {} counter = 0 for col in sheet.iter_cols(1, sheet.max_column, values_only=True): if col[1] is not None: col_dict[col[1]] = counter counter += 1 return col_dict
def create_formcell(self, judgement, ws: op.worksheet.worksheet.Worksheet, column: int, row: int) -> None: """Fill the given cell with the form's data. In the cell described by ws, column, row, dump the data for the form: Write into the the form data, and supply a comment from the judgement if there is one. """ # TODO: Use CLDF terms instead of column names, like the c_ elsewhere cell_value = self.form_to_cell_value(judgement[0]) form_cell = ws.cell(row=row, column=column, value=cell_value) comment = judgement[1].get("Comment", None) if comment: form_cell.comment = op.comments.Comment(comment, __package__) link = self.URL_BASE.format(urllib.parse.quote(judgement[0]['ID'])) form_cell.hyperlink = link
def __align(sheet: opx.worksheet.worksheet.Worksheet, row: int, start_col: int, end_col: int, horizontal_align: str = "center") -> None: """ Align row from start_col to end_col cells to some horizontal_align :param sheet: Sheet where a row will be aligned :param row: Row to be aligned accessed with cell.row :param start_col: Starting column :param end_col: Ending column :param horizontal_align: How the row will be aligned :return: None """ for row_cells in sheet.iter_cols(start_col, end_col, row, row): for cell in row_cells: cell.alignment = opx_style.Alignment(horizontal_align)
def __add_font_row(sheet: opx.worksheet.worksheet.Worksheet, row: int, font_name: str, font_size: int, font_color: str, font_bold: Optional[bool] = False) -> None: """ :param sheet: Sheet where it will be :param row: Row to be given a new font :param font_name: Name of a font :param font_size: Size of a font :param font_color: Color of a font :param font_bold: True if a font should be bold else False :return: None """ for row_cells in sheet.iter_rows(row, row, 1, sheet.max_column): for cell in row_cells: Excel.__add_font(cell, font_name, font_size, font_color, font_bold)
def header_from_cognate_excel( ws: openpyxl.worksheet.worksheet.Worksheet, dataset: pycldf.Dataset ): row_header = [] separators = [] for (header,) in ws.iter_cols( min_row=1, max_row=1, max_col=len(dataset["CognatesetTable"].tableSchema.columns), ): column_name = header.value if column_name is None: column_name = dataset["CognatesetTable", "id"].name elif column_name == "CogSet": column_name = dataset["CognatesetTable", "id"].name try: column_name = dataset["CognatesetTable", column_name].name except KeyError: break row_header.append(column_name) separators.append(dataset["CognatesetTable", column_name].separator) return row_header, separators
def pretty( ws: openpyxl.worksheet.worksheet.Worksheet, col_widths: t.List[int], top: int, left: int, tty_rows: int = 20, ) -> None: for r, row in enumerate( ws.iter_rows(max_row=min(tty_rows - 2, ws.max_row), max_col=len(col_widths)) ): c1, c2 = col_widths[: left - 1], col_widths[left - 1 :] if r + 1 == top: print( "┿".join(["━" * c for c in c1]), "┿".join(["━" * c for c in c2]), sep="╋", ) print( "│".join(to_width(c.value or "", l) for c, l in zip(row, c1)), "│".join(to_width(c.value or "", l) for c, l in zip(row[left - 1 :], c2)), sep="┃", )
def parse_all_languages( self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> t.Dict[str, str]: """Parse all language descriptions in the focal sheet. Returns ======= languages: A dictionary mapping columns ("B", "C", "D", …) to language IDs """ languages_by_column: t.Dict[str, str] = {} # iterate over language columns for lan_col in cli.tq( sheet.iter_cols(min_row=1, max_row=self.top - 1, min_col=self.left), task="Parse all languages", total=sheet.max_column - self.left, ): c_l_id = self.db.dataset["LanguageTable", "id"].name if cells_are_empty(lan_col): # Skip empty languages continue language = self.language_from_column(lan_col) candidates = self.db.find_db_candidates( language, self.check_for_language_match, ) for language_id in candidates: break else: if self.on_language_not_found(language, lan_col[0].coordinate): self.db.insert_into_db(language) else: continue language_id = language[c_l_id] languages_by_column[lan_col[0].column] = language_id return languages_by_column
def add_plan_ordering_defaults( plan_dict: dict, worksheet: openpyxl.worksheet.worksheet.Worksheet ): row = util.get_next_empty_row_in_col(col=1, worksheet=worksheet) attribute_label_dict = { "Prompt user for plan start date and venue": util.bool_to_str( plan_dict.get("plan_ord_def_prompt_user_ind") ), "Open by Default:": plan_dict.get("plan_ord_def_open_default"), "Select default visit type": plan_dict.get("plan_ord_def_default_visit"), } plan_ord_def_columns = [ "Primary Phase", "Optional Phase", "Future Phase", "This Visit (OP)", "This Visit (IP)", "Future Visit (OP)", "Future Visit (IP)", ] cell_column = { "description": 2, "primary_phase_ind": 3, "optional_phase_ind": 4, "future_phase_ind": 5, "this_visit_outpt": 6, "this_visit_inpt": 7, "future_visit_outpt": 8, "future_visit_inpt": 9, } for k, v in attribute_label_dict.items(): active_cell = worksheet.cell(row=row, column=1) active_cell.value = k active_cell.font = Font(b=True) active_cell.offset(column=1).value = v row += 1 active_cell = util_borders.set_outside_borders(worksheet.cell(row=row, column=2)) for idx, val in enumerate(plan_ord_def_columns, start=3): active_cell = worksheet.cell(row=row, column=idx) active_cell.value = val active_cell.font = Font(b=True) active_cell.alignment = Alignment( horizontal="center", vertical="center", wrap_text=True ) active_cell = util_borders.set_outside_borders(active_cell) row += 1 for _, v in sorted(plan_dict["phases"].items(), key=dict_loop.get_phase_seq): for k2, v2 in v.items(): if k2 in cell_column: active_cell = worksheet.cell(row=row, column=cell_column.get(k2)) if isinstance(v2, bool) and v2: active_cell.value = "x" active_cell.alignment = Alignment(horizontal="center") elif isinstance(v2, bool) and not v2: active_cell.value = "" else: active_cell.value = v2 active_cell = util_borders.set_outside_borders(active_cell) row += 1 return worksheet
def highlight_row(sheet: openpyxl.worksheet.worksheet.Worksheet, row_num: int, color: int) -> None: row = list(sheet.iter_rows(min_row=row_num, max_row=row_num, min_col=1))[0] for cell in row: cell.fill = PatternFill(start_color=Color(indexed=color), end_color=Color(indexed=color), fill_type="solid")
def format_worksheet( ws: pyxl.worksheet.worksheet.Worksheet, anchors:list = ["B2", "C3", ""], keep=True, set_header:bool = True, set_content:bool = True, set_width:bool = True, frozen:Union[bool, str, tuple, list] = True ) -> None: """ Description: Params: ws: worksheet to be formatted anchors: [CR1, CR2, CRN], which determine the whole df CR1 col_header CR2 row_header CRN Return: None """ # Set sheet' style ws.sheet_view.showGridLines=False ws.sheet_view.showRowColHeaders=True # Copy to avoid affecting the list anchors = anchors.copy() # Set data dimensions with default dimensions if not anchors: cr1, crn = ws.dimensions.split(":") anchors = [cr1, cr1, crn] if anchors[2] == "": anchors[2] = ws.dimensions.split(":")[1] # Get data structure in `ws` # ach:[ # [cr1_c, cr1_r], # [cr2_c, cr2_r], # [crn_c, crn_r], # ] ach = [0] * 3 for idx, anchor in enumerate(anchors): if isinstance(anchor, str): cr_c, cr_r = _split_cell(anchor) else: cr_c, cr_r = anchor, _get_col_str(anchor[1]) ach[idx] = [cr_c, cr_r] # Row header exists if set_header: if ach[0][0] != ach[1][0]: apply_style( ws, _con_cell(ach[0]), _around_cell(_con_cell([ach[1][0], ach[2][1]]), (-1, 0)), HEADER_STYLE, keep ) # column header exists if ach[0][1] != ach[1][1]: apply_style( ws, _con_cell(ach[0]), _around_cell(_con_cell([ach[2][0], ach[1][1]]), (0, -1)), HEADER_STYLE, keep ) # Set content style if set_content: apply_style( ws, _con_cell(ach[1]), _con_cell(ach[2]), CONTENT_NOFILL, keep ) # Set freeze panes if frozen: if frozen == True: frozen = anchors[1] elif isinstance(frozen, list) or isinstance(frozen, tuple): frozen = _con_cell(frozen) ws.freeze_panes = frozen # Set columns' width, between 10-100, acording to value length # It' seems that `width = len * font_size / 10` works fine, for # ASCII chars # Cell has valid `font` attribute with no-None value. But if cell # was set with any other styles beyond `openpyxl`, the `font` # attribute's value will be set to `None` if set_width: start_column_idx = _get_col_num(ach[0][0]) end_column_idx = _get_col_num(ach[2][0]) for column_idx, column in enumerate(ws.iter_cols(start_column_idx, end_column_idx), start_column_idx): widths = [] for cell in column: width = len(str(cell.value)) * cell.font.size // 20 widths.append(width) width = min(max(*widths, 10), 100) ws.column_dimensions[_get_col_str(column_idx)].width = width + 2
def parse(self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> dict: cell = { f"{self.alias}": sheet.cell(row=self.row, column=self.col).value } return cell
def add_row_2( worksheet: openpyxl.worksheet.worksheet.Worksheet, # comp_row: int = None, component_type: str = None, component: str = None, no_default_order_sentence: bool = None, required: bool = None, prechecked: bool = None, time_zero_ind: bool = None, time_zero_offset: str = None, offset: str = None, bgcolor_red: int = None, bgcolor_blue: int = None, bgcolor_green: int = None, target_duration: str = None, dcp_clin_cat: str = None, dcp_clin_sub_cat: str = None, allow_proactive_eval: bool = None, chemo_ind: bool = None, chemo_related_ind: bool = None, persistent_note: bool = None, linking_anchor_comp_ind: bool = None, linking_group_desc: str = None, linking_rule: str = None, linking_rule_quantity: int = None, linking_override_reason: str = None, order_sentences: dict = None, ): cell_column = { "component_type": 1, "component": 2, "iv_ingredient": 3, "order_sentence_display_line": 4, "order_comment": 5, "no_default_order_sentence": 6, "required": 7, "prechecked": 8, "time_zero_ind": 9, "time_zero_offset": 10, "offset": 11, "target_duration": 12, "allow_proactive_eval": 13, "chemo_ind": 14, "chemo_related_ind": 15, "persistent_note": 16, } comp_row = util.get_next_empty_row_in_col(col=1, worksheet=worksheet) payload_dict = { k: v for k, v in locals().items() if v is not None and v not in ["0", 0] and k not in ["worksheet", "comp_row", "cell_column"] } for k, v in payload_dict.items(): if k in cell_column: comp_col = cell_column.get(k) active_cell = worksheet.cell(row=comp_row, column=comp_col) if isinstance(v, bool) and v: active_cell = add_ind_to_cell(active_cell) elif v == "Note": active_cell = format_note_cells( active_cell=active_cell, bgcolor_red=bgcolor_red, bgcolor_green=bgcolor_green, bgcolor_blue=bgcolor_blue, ) else: if linking_rule_quantity and comp_col == 1: active_cell.value = ( "{} - Linked Component Group - {}, {}, {}, {}".format( v, linking_group_desc, linking_rule, linking_rule_quantity, linking_override_reason, )) else: active_cell.value = v active_cell.alignment = Alignment(wrap_text=True) for os_id, os in sorted(order_sentences.items(), key=dict_loop.get_order_sentence_seq): if os_id != 0: sentence_column = cell_column.get("order_sentence_display_line") iv_component_column = cell_column.get("iv_ingredient") order_comment_column = cell_column.get("order_comment") active_cell = worksheet.cell(row=comp_row, column=sentence_column) active_cell.value = os.get("order_sentence_display_line") active_cell.alignment = Alignment(wrap_text=True) active_cell = worksheet.cell(row=comp_row, column=order_comment_column) active_cell.value = os.get("order_comment") active_cell.alignment = Alignment(wrap_text=True) if os.get("iv_ingredient"): active_cell = worksheet.cell(row=comp_row, column=iv_component_column) active_cell.value = os.get("iv_ingredient") comp_row += 1 return worksheet
def _create_xml_individual_files( # pylint: disable=too-many-branches self, sheet: openpyxl.worksheet.worksheet.Worksheet, sub_series: dict[str, etree._Element], ) -> None: """Based on a sheet creates .xml element entries for every file found.""" prev_file, prev_file_did, prev_series = None, None, None for file in sheet.iter_rows(): similar = False individual_verso = False # Skip empty lines or series title lines if file[0].value is None or file[0].value.endswith("_title"): continue # Error on files with a space in their "file number" if " " in file[0].value: raise ValueError( f"There is a space in the file number {file[0].value}. This is not allowed!" ) file_data = parse_file(file) if ( prev_file is not None and prev_file_did is not None and file_data.series == prev_series and file_data.title != "Bianca" ): similar = compare_rows(file, prev_file) # If current file is a verso description, remove verso from previous daoset if re.match(r".+v", file_data.file_name): if prev_file_did is None: raise ValueError( # pylint: disable-next=line-too-long f"{file_data.file_name} is a verso appearing before the description of {file_data.file_name[:-1]}" ) # If the previous file ends in u, the v is not 'verso', but # continuation of long document. if not prev_file_did.find("unitid").text.endswith("u"): individual_verso = True if individual_verso: for dao in prev_file_did.find("daoset"): if dao.attrib["id"] == f"{file_data.file_name}.tif": dao.getparent().remove(dao) if not similar: prev_file_did = self.file_entry( sub_series[file_data.series], file_data, individual_verso ) # Update pages/id of previous document else: # If similar means prev_file_did is defined prev_file_did = cast( etree._Element, prev_file_did # pylint: disable=protected-access ) unitid = prev_file_did.find("unitid") if ( not isinstance( unitid, etree._Element # pylint: disable=protected-access ) or not unitid.text or not isinstance(unitid.text, str) ): raise ValueError( f"Can't find unitid in {prev_file_did}, it is empty or it isn't a string" ) if "-" in unitid.text: unitid.text = ( unitid.text[: unitid.text.index("-") + 1] + file_data.page ) else: unitid.text += f"-{file_data.page}" # Update daoset of previous document daoset = prev_file_did.find("daoset") if not isinstance( daoset, etree._Element # pylint: disable=protected-access ): raise ValueError(f"Can't find daoset in {prev_file_did}") add_dao(daoset, file_data, individual_verso) prev_file = file prev_series = file_data.series
def format_cells(sheet: openpyxl.worksheet.worksheet.Worksheet): for row in sheet.iter_rows(): for cell in row: cell.alignment = Alignment(wrap_text=True, vertical='top')
def import_interleaved( ws: openpyxl.worksheet.worksheet.Worksheet, logger: logging.Logger = cli.logger, ids: t.Optional[t.Set[types.Cognateset_ID]] = None, ) -> t.Iterable[ t.Tuple[ types.Form_ID, types.Language_ID, types.Parameter_ID, str, None, types.Cognateset_ID, ] ]: if ids is None: ids = set() comma_or_semicolon = re.compile("[,;]\\W*") concepts = [] for concept_metadata in ws.iter_cols(min_col=1, max_col=1, min_row=2): for entry, cogset in zip(concept_metadata[::2], concept_metadata[1::2]): try: concepts.append(clean_cell_value(entry)) except AttributeError: break for language in cli.tq( ws.iter_cols(min_col=2), task="Parsing cells", total=ws.max_column ): language_name = clean_cell_value(language[0]) for c, (entry, cogset) in enumerate(zip(language[1::2], language[2::2])): if not entry.value: if cogset.value: logger.warning( f"Cell {entry.coordinate} was empty, but cognatesets {cogset.value} were given in {cogset.coordinate}." ) continue bracket_level = 0 i = 0 f = clean_cell_value(entry) forms = [] try: len(f) except TypeError: cli.Exit.INVALID_INPUT( "I expected one or more forms (so, text) in cell {}, but found {}. Do you have more than one header row?".format( entry.coordinate, f ) ) while i < len(f): match = comma_or_semicolon.match(f[i:]) if f[i] == "(": bracket_level += 1 i += 1 continue elif f[i] == ")": bracket_level -= 1 i += 1 continue elif bracket_level: i += 1 continue elif match: forms.append(f[:i].strip()) i += match.span()[1] f = f[i:] i = 0 else: i += 1 forms.append(f.strip()) if isinstance(clean_cell_value(cogset), int): cogsets = [str(clean_cell_value(cogset))] else: cogset = clean_cell_value(cogset) cogsets = comma_or_semicolon.split(cogset.strip()) if len(cogsets) == 1 or len(cogsets) == len(forms): True else: logger.warning( "{:}: Forms ({:}) did not match cognates ({:})".format( entry.coordinate, ", ".join(forms), ", ".join(cogsets) ) ) for form, cogset in zip(forms, cogsets + [None]): if form == "?" or cogset == "?": continue base_id = util.string_to_id(f"{language_name}_{concepts[c]}") id = base_id synonym = 1 while id in ids: synonym += 1 id = f"{base_id}_s{synonym:d}" yield (id, language_name, concepts[c], form, None, cogset) ids.add(id)
def parse_cells( self, sheet: openpyxl.worksheet.worksheet.Worksheet, status_update: t.Optional[str] = None, ) -> None: languages = self.parse_all_languages(sheet) row_object: t.Optional[R] = None for row in cli.tq( sheet.iter_rows(min_row=self.top), task="Parsing cells", total=sheet.max_row - self.top, ): row_header, row_forms = row[:self.left - 1], row[self.left - 1:] # Parse the row header, creating or retrieving the associated row # object (i.e. a concept or a cognateset) properties = self.properties_from_row(row_header) if properties: c_r_id = self.db.dataset[properties.__table__, "id"].name try: c_r_name = self.db.dataset[properties.__table__, "name"].name except KeyError: c_r_name = None similar = self.db.find_db_candidates(properties, self.check_for_row_match) for row_id in similar: properties[c_r_id] = row_id break else: if self.on_row_not_found( properties, cell_identifier=row[0].coordinate): if c_r_id not in properties: properties[c_r_id] = string_to_id( str(properties.get(c_r_name, ""))) self.db.make_id_unique(properties) self.db.insert_into_db(properties) else: continue # check the fields of properties are not empty, if so, set row # object to properties. This means that if there is no # properties object, of if it is empty, the previous row object # is re-used. This is intentional. if any(properties.values()): row_object = properties if row_object is None: if any(c.value for c in row_forms): raise AssertionError( "Your first data row didn't have a name. " "Please check your format specification or ensure the first row has a name." ) else: continue # Parse the row, cell by cell for cell_with_forms in row_forms: try: this_lan = languages[cell_with_forms.column] except KeyError: continue # Parse the cell, which results (potentially) in multiple forms if row_object.__table__ == "FormTable": raise NotImplementedError( "TODO: I am confused why what I'm doing right now ever landed on my agenda, but you seem to have gotten me to attempt it. Please contact the developers and tell them what you did, so they can implement the thing you tried to do properly!" ) c_f_form = self.db.dataset[row_object.__table__, "form"].name for params in self.cell_parser.parse( cell_with_forms, this_lan, f"{sheet.title}.{cell_with_forms.coordinate}", ): if row_object.__table__ == "FormTable": if params[c_f_form] == "?": continue else: self.handle_form( params, row_object, cell_with_forms, this_lan, status_update, ) else: self.handle_form(params, row_object, cell_with_forms, this_lan, status_update) self.db.commit()
def set_horiz_vert_center_across_columns( worksheet: openpyxl.worksheet.worksheet.Worksheet, min_col:int, max_col: int): for row in worksheet.iter_cols(min_col=min_col, max_col=max_col, min_row=4): for cell in row: cell.alignment = Alignment(horizontal='center', vertical='center') return worksheet
def set_word_wrap_across_columns( worksheet: openpyxl.worksheet.worksheet.Worksheet, min_col:int, max_col: int): for row in worksheet.iter_cols(min_col=min_col, max_col=max_col): for cell in row: cell.alignment = Alignment(wrapText=True) return worksheet
def get_headers_from_excel( sheet: openpyxl.worksheet.worksheet.Worksheet, ) -> t.Iterable[str]: return normalize_header(r for c, r in enumerate(next(sheet.iter_rows(1, 1))))
def setup_headers(sheet: openpyxl.worksheet.worksheet.Worksheet) -> None: for column, header in create_srg_export.COLUMN_MAPPINGS.items(): sheet[f'{column}1'] = header for cell in list(sheet.iter_rows(max_row=1))[0]: cell.font = Font(bold=True, name='Calibri')