Пример #1
0
def _write_dynamic_grid_conditional_formatting(val_sheet):
    """

    :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet
    """
    # Light red fill with dark red text
    red_format = xlsxbasics.make_format(val_sheet.workbook, {
        'bg_color': '#FFC7CE',
        'font_color': '#9C0006',
        'underline': 1
    })

    # Green fill with (same) green text
    green_format = xlsxbasics.make_format(val_sheet.workbook, {
        'bg_color': '#C6EFCE',
        'font_color': '#C6EFCE'
    })

    # NB: grid goes as far as last allowable row for samples, not just to number of expected samples, in case user
    # adds some extra ones :)
    dynamic_grid_range = xlsxbasics.format_range(
        val_sheet.first_data_col_index, val_sheet.first_data_row_index,
        val_sheet.last_data_col_index,
        val_sheet.last_allowable_row_for_sample_index)

    val_sheet.worksheet.conditional_format(dynamic_grid_range, {
        'type': 'cell',
        'criteria': '==',
        'value': "\"\"",
        'format': green_format
    })

    val_sheet.worksheet.conditional_format(dynamic_grid_range, {
        'type': 'cell',
        'criteria': '==',
        'value': "\"Fix\"",
        'format': red_format
    })
Пример #2
0
def write_dynamic_validation_grid(val_sheet,
                                  index_and_range_str_tuple_by_header_dict):
    """

    :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet
    """

    _write_dynamic_name_link_col(val_sheet,
                                 index_and_range_str_tuple_by_header_dict)

    # apparently can't add alignment to a conditional format :(
    centered_format = xlsxbasics.make_format(val_sheet.workbook,
                                             {'align': 'center'})

    # at outer level, move across columns
    for curr_col_index in range(val_sheet.first_data_col_index,
                                val_sheet.last_data_col_index + 1):
        val_sheet.worksheet.set_column(curr_col_index, curr_col_index, None,
                                       centered_format)

        curr_static_grid_col_index = val_sheet.first_static_grid_col_index + curr_col_index - 1
        col_rank = _format_dynamic_rank_formula_str(
            val_sheet,
            curr_static_grid_col_index,
            index_and_range_str_tuple_by_header_dict,
            for_row=False)

        col_already_valid_condition = _format_range_already_valid_formula_str(
            val_sheet,
            col_rank,
            index_and_range_str_tuple_by_header_dict,
            for_row=False)

        _write_dynamic_header_cell(val_sheet, curr_col_index, col_rank)

        # at inner level, move down rows
        for curr_row_index in range(val_sheet.first_data_row_index,
                                    val_sheet.last_data_row_index + 1):
            cell_formula = _generate_dynamic_grid_cell_formula_str(
                val_sheet, col_rank, col_already_valid_condition,
                curr_row_index, index_and_range_str_tuple_by_header_dict)
            curr_cell = xlsxbasics.format_range(curr_col_index, curr_row_index)
            val_sheet.worksheet.write_formula(curr_cell, cell_formula)

    _write_dynamic_grid_conditional_formatting(val_sheet)
Пример #3
0
def write_workbook(study_name, schema_dict, form_dict, metadata_wizard_settings):
    num_allowable_samples = 1000
    # TODO: someday: either expand code to use num_samples and add real code to get in from interface, or take out unused hook
    num_samples = 0
    num_columns = len(schema_dict.keys())
    a_regex_handler = metadata_wizard_settings.regex_handler

    # create workbook
    file_base_name = slugify(study_name)
    file_name = '{0}_{1}.xlsx'.format(file_base_name, randrange(1000, 9999))
    output_path = metadata_wizard_settings.get_output_path(file_name)
    workbook = xlsxwriter.Workbook(output_path, {'strings_to_numbers': False,
                                               'strings_to_formulas': True,
                                               'strings_to_urls': True})

    # write metadata worksheet
    phi_renamed_schema_dict = qiimp.schema_builder.rewrite_field_names_with_phi_if_relevant(schema_dict)
    metadata_worksheet = xlsxbasics.MetadataWorksheet(workbook, num_columns, num_samples, a_regex_handler,
                                                      num_allowable_samples=num_allowable_samples)
    qiimp.xlsx_metadata_grid_builder.write_metadata_grid(metadata_worksheet, phi_renamed_schema_dict,
                                                                   DescriptionWorksheet.get_sheet_name())

    # write validation worksheet
    validation_worksheet = qiimp.xlsx_static_grid_builder.ValidationWorksheet(workbook, num_columns,
                                                                                        num_samples, a_regex_handler)
    index_and_range_str_tuple_by_header_dict = qiimp.xlsx_static_grid_builder.write_static_validation_grid_and_helpers(
        validation_worksheet, phi_renamed_schema_dict)
    qiimp.xlsx_dynamic_grid_builder.write_dynamic_validation_grid(
        validation_worksheet, index_and_range_str_tuple_by_header_dict)

    # write descriptions worksheet
    descriptions_worksheet = DescriptionWorksheet(workbook, num_columns, num_samples, a_regex_handler)
    xlsxbasics.write_header(descriptions_worksheet, "field name", 0)
    xlsxbasics.write_header(descriptions_worksheet, "field description", 1)
    sorted_keys = xlsxbasics.sort_keys(phi_renamed_schema_dict)
    for field_index, field_name in enumerate(sorted_keys):
        row_num = field_index + 1 + 1  # plus 1 to move past name row, and plus 1 again because row nums are 1-based
        field_specs_dict = phi_renamed_schema_dict[field_name]
        message = qiimp.xlsx_validation_builder.get_field_constraint_description(field_specs_dict, a_regex_handler)
        descriptions_worksheet.worksheet.write("A{0}".format(row_num), field_name, metadata_worksheet.header_format)
        descriptions_worksheet.worksheet.write("B{0}".format(row_num), message)

    # write schema worksheet--note, don't use the phi_renamed_schema_dict but the original schema_dict
    schema_worksheet = xlsxbasics.create_worksheet(workbook, xlsxbasics.SheetNames.schema.value)
    schema_worksheet.write_string("A1", yaml.dump(schema_dict, default_flow_style=False))
    schema_worksheet.hide()

    # write form worksheet
    form_worksheet = xlsxbasics.create_worksheet(workbook, xlsxbasics.SheetNames.form.value)
    form_worksheet.write_string("A1", yaml.dump(form_dict, default_flow_style=False))
    form_worksheet.hide()

    # write readme worksheet
    readme_format = workbook.add_format({'align': 'left', 'valign': 'top'})
    readme_format.set_text_wrap()
    readme_worksheet = xlsxbasics.create_worksheet(workbook, xlsxbasics.SheetNames.readme.value)
    readme_worksheet.set_column(0, 0, 100)  # Width of column A set to 100.
    tutorial_hyperlink_str = "=HYPERLINK(\"{0}\", \"Click here for instructions on using this spreadsheet.\")".format(
                                       metadata_wizard_settings.TUTORIAL_LINK)

    readme_worksheet.write_formula(0,0, tutorial_hyperlink_str,
                                   # TODO: someday: centralize link format definitions.
                                   # this link format is a copy-paste of the one in
                                   # xlsx_dynamic_grid_builder._write_dynamic_name_link_col ; assuming we want
                                   # all the link formats in the spreadsheet to look the same, they should be defined
                                   # in one central place.
                                   xlsxbasics.make_format(workbook, {'font_color': 'blue', 'underline': 1}))
    readme_worksheet.write_string('A3', metadata_wizard_settings.make_readme_text(), readme_format)

    # close workbook
    workbook.close()
    return file_name
Пример #4
0
def _write_dynamic_name_link_col(val_sheet,
                                 index_and_range_str_tuple_by_header_dict):
    """

    :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet
    """

    # e.g., =IF(B2=" "," ",HYPERLINK(CONCATENATE("#metadata!", ADDRESS(INDEX($AL$2:$AL$11,MATCH(ROWS($AJ$2:AJ2),
    # $AJ$2:$AJ$11,0),0),$AQ$14)),INDEX($AO$2:$AO$11,MATCH(ROWS($AJ$2:AJ2),$AJ$2:$AJ$11,0),0)))

    # Create the standard blue, underlined url link format.
    url_format = xlsxbasics.make_format(val_sheet.workbook, {
        'font_color': 'blue',
        'underline': 1
    })

    xlsxbasics.write_header(val_sheet, val_sheet.SAMPLE_NAME_HEADER,
                            val_sheet.name_link_col_index)

    # NB: grid goes as far as last allowable row for samples, not just to number of expected samples, in case user
    # adds some extra ones :)
    for curr_row_index in range(
            val_sheet.first_data_row_index,
            val_sheet.last_allowable_row_for_sample_index + 1):
        row_rank_num = _format_dynamic_rank_formula_str(
            val_sheet,
            curr_row_index,
            index_and_range_str_tuple_by_header_dict,
            for_row=True)

        row_in_metadata_fixed_range_str = index_and_range_str_tuple_by_header_dict[
            val_sheet.ROW_IN_METADATA_HEADER][1]
        metadata_row_index_str = "INDEX({row_in_metadata_fixed_range_str},{row_rank_num},0)".format(
            row_in_metadata_fixed_range_str=row_in_metadata_fixed_range_str,
            row_rank_num=row_rank_num)

        link_address = "CONCATENATE(\"#metadata!\",ADDRESS({metadata_row_index},{metadata_name_col_index}))".format(
            metadata_row_index=metadata_row_index_str,
            metadata_name_col_index=val_sheet.name_col_index)

        helper_name_fixed_range_str = index_and_range_str_tuple_by_header_dict[
            val_sheet.SAMPLE_NAME_HEADER][1]
        # this index formula will get the value of the name for this sample from the helper col next to the static grid
        helper_name_val = "INDEX({conditional_name_fixed_range_str},{row_num},0)".format(
            conditional_name_fixed_range_str=helper_name_fixed_range_str,
            row_num=row_rank_num)

        curr_cell = xlsxbasics.format_range(val_sheet.name_link_col_index,
                                            curr_row_index)

        # If this sample is entirely valid as shown by the fact that the first data cell in the dynamic grid for this
        # row is just an empty string, write a space into the dynamic name cell.  Otherwise, write a link to the
        # name column for this sample in the metadata sheet.  NB: it does NOT work to look at
        # the value in the is_valid helper column for this sample (either True or False) because the samples
        # change order based on validation status ...
        first_data_cell_in_first_data_col = xlsxbasics.format_range(
            val_sheet.name_link_col_index + 1, curr_row_index)
        full_formula = "=IF({first_validation_cell}=\" \",\" \",HYPERLINK({link_address},{helper_name_val}))".format(
            first_validation_cell=first_data_cell_in_first_data_col,
            link_address=link_address,
            helper_name_val=helper_name_val)
        val_sheet.worksheet.write_formula(curr_cell, full_formula, url_format)
Пример #5
0
def write_metadata_grid(data_worksheet, schema_dict, field_descs_sheet_name):
    """

    :type data_worksheet: xlsxbasics.MetadataWorksheet
    """

    _write_sample_id_col(data_worksheet)

    unlocked = xlsxbasics.make_format(data_worksheet.workbook, is_locked=False)
    # format as text to prevent autoformatting!
    unlocked_text = xlsxbasics.make_format(data_worksheet.workbook,
                                           {'num_format': '@'},
                                           is_locked=False)

    sorted_keys = xlsxbasics.sort_keys(schema_dict)
    for field_index, field_name in enumerate(sorted_keys):
        field_specs_dict = schema_dict[field_name]
        curr_col_index = field_index + 1  # add one bc sample id is in first col

        xlsxbasics.write_header(data_worksheet, field_name, field_index + 1)
        curr_format = unlocked_text if _determine_if_format_should_be_text(
            field_specs_dict) else unlocked
        data_worksheet.worksheet.set_column(curr_col_index, curr_col_index,
                                            None, curr_format)

        col_range = xlsxbasics.format_range(curr_col_index, None)
        starting_cell_name = xlsxbasics.format_range(
            curr_col_index, data_worksheet.first_data_row_index)
        whole_col_range = xlsxbasics.format_range(
            curr_col_index,
            data_worksheet.first_data_row_index,
            last_row_index=data_worksheet.last_allowable_row_for_sample_index)

        validation_dict = _get_validation_dict(field_name, field_specs_dict,
                                               data_worksheet.regex_handler,
                                               field_descs_sheet_name)
        value_key = "value"
        if validation_dict is not None:
            if value_key in validation_dict:
                unformatted_validation_formula = validation_dict[value_key]
                formatted_validation_formula = unformatted_validation_formula.format(
                    cell=starting_cell_name, col_range=col_range)
                validation_dict[value_key] = formatted_validation_formula

            validation_return_code = data_worksheet.worksheet.data_validation(
                whole_col_range, validation_dict)
            # NB: xlsxwriter's data_validation docstring *claims* it returns 0 if it succeeds, but in fact if it
            # succeeds it doesn't return an error code at all, hence the then None check ...
            if validation_return_code is not None and validation_return_code < 0:
                raise ValueError(
                    "Worksheet validation failed with return code '{0}'; check user warnings."
                    .format(validation_return_code))

        _add_default_if_any(data_worksheet, field_specs_dict, curr_col_index)

        max_samples_msg = "No more than {0} samples can be entered in this worksheet.  If you need to submit metadata" \
                          " for >{0} samples, please contact CMI directly.".format(data_worksheet.num_allowable_samples)
        xlsxbasics.write_header(
            data_worksheet, max_samples_msg,
            data_worksheet.first_data_col_index,
            data_worksheet.last_allowable_row_for_sample_index + 1)
def write_metadata_grid(data_worksheet, schema_dict, field_descs_sheet_name):
    """

    :type data_worksheet: xlsxbasics.MetadataWorksheet
    """

    _write_sample_id_col(data_worksheet)

    unlocked = xlsxbasics.make_format(data_worksheet.workbook, is_locked=False)
    # format as text to prevent autoformatting!
    unlocked_text = xlsxbasics.make_format(data_worksheet.workbook,
                                           {'num_format': '@'},
                                           is_locked=False)

    sorted_keys = xlsxbasics.sort_keys(schema_dict)
    for field_index, field_name in enumerate(sorted_keys):
        field_specs_dict = schema_dict[field_name]
        curr_col_index = field_index + 1  # add one bc sample id is in first col

        xlsxbasics.write_header(data_worksheet, field_name, field_index + 1)
        curr_format = unlocked_text if _determine_if_format_should_be_text(
            field_specs_dict) else unlocked
        # Note: although the xlsxwriter docs say
        # "If you wish to set the format without changing the width you can
        # pass None as the width parameter" (https://xlsxwriter.readthedocs.io/
        # worksheet.html), it appears that if I call set_column with None for
        # the width *after* calling write_header (which sets the width
        # explicitly), the width of the column is reset to the default, thus
        # requiring me to explicitly set the min column width here too :(
        data_worksheet.worksheet.set_column(curr_col_index, curr_col_index,
                                            xlsxbasics.get_min_col_width(),
                                            curr_format)

        col_range = xlsxbasics.format_range(curr_col_index, None)
        starting_cell_name = xlsxbasics.format_range(
            curr_col_index, data_worksheet.first_data_row_index)
        whole_col_range = xlsxbasics.format_range(
            curr_col_index,
            data_worksheet.first_data_row_index,
            last_row_index=data_worksheet.last_allowable_row_for_sample_index)

        validation_dict = _get_validation_dict(field_name, field_specs_dict,
                                               data_worksheet.regex_handler,
                                               field_descs_sheet_name)
        value_key = "value"
        if validation_dict is not None:
            if value_key in validation_dict:
                unformatted_validation_formula = validation_dict[value_key]
                formatted_validation_formula = unformatted_validation_formula.format(
                    cell=starting_cell_name, col_range=col_range)
                validation_dict[value_key] = formatted_validation_formula

            validation_return_code = data_worksheet.worksheet.data_validation(
                whole_col_range, validation_dict)
            # NB: xlsxwriter's data_validation docstring *claims* it returns 0 if it succeeds, but in fact if it
            # succeeds it doesn't return an error code at all, hence the then None check ...
            if validation_return_code is not None and validation_return_code < 0:
                raise ValueError(
                    "Worksheet validation failed with return code '{0}'; check user warnings."
                    .format(validation_return_code))

        _add_default_if_any(data_worksheet, field_specs_dict, curr_col_index)

        max_samples_msg = "No more than {0} samples can be entered in this worksheet.  If you need to submit metadata" \
                          " for >{0} samples, please contact CMI directly.".format(data_worksheet.num_allowable_samples)
        xlsxbasics.write_header(
            data_worksheet,
            max_samples_msg,
            data_worksheet.first_data_col_index,
            data_worksheet.last_allowable_row_for_sample_index + 1,
            set_width=False)