def render_common(summary): n_extreme_obs = config["n_extreme_obs"].get(int) n_freq_table_max = config["n_freq_table_max"].get(int) template_variables = { # TODO: with nan "freq_table_rows": freq_table( freqtable=summary["value_counts_without_nan"], n=summary["n"], max_number_to_print=n_freq_table_max, ), "firstn_expanded": extreme_obs_table( freqtable=summary["value_counts_without_nan"], number_to_print=n_extreme_obs, n=summary["n"], ascending=True, ), "lastn_expanded": extreme_obs_table( freqtable=summary["value_counts_without_nan"], number_to_print=n_extreme_obs, n=summary["n"], ascending=False, ), } return template_variables
def render_common(config: Settings, summary: dict) -> dict: n_extreme_obs = config.n_extreme_obs n_freq_table_max = config.n_freq_table_max template_variables = { # TODO: with nan "freq_table_rows": freq_table( freqtable=summary["value_counts_without_nan"], n=summary["n"], max_number_to_print=n_freq_table_max, ), "firstn_expanded": extreme_obs_table( freqtable=summary["value_counts_index_sorted"], number_to_print=n_extreme_obs, n=summary["n"], ), "lastn_expanded": extreme_obs_table( freqtable=summary["value_counts_index_sorted"][::-1], number_to_print=n_extreme_obs, n=summary["n"], ), } return template_variables
def render_path_image(summary): n_freq_table_max = config["n_freq_table_max"].get(int) image_format = config["plot"]["image_format"].get(str) template_variables = render_path(summary) # Top template_variables["top"].content["items"][0].content[ "var_type"] = "Image Path" # Bottom keys = {"Image shape": "image_shape", "Exif keys": "exif_keys"} for title, key in keys.items(): template_variables["freqtable_{}".format(key)] = freq_table( freqtable=summary["{}_counts".format(key)], n=summary["n"], max_number_to_print=n_freq_table_max, ) # TODO: add dropdown to switch to specific values exif_keys = FrequencyTable( template_variables["freqtable_{}".format("exif_keys")], name="Exif keys", anchor_id="{varid}exif_frequency".format(varid=summary["varid"]), ) template_variables["bottom"].content["items"].append(exif_keys) image_shape_freq = FrequencyTable( template_variables["freqtable_{}".format("image_shape")], name="Frequency", anchor_id="{varid}image_shape_frequency".format( varid=summary["varid"]), ) image_shape_scatter = Image( scatter_series(summary["scatter_data"]), image_format=image_format, alt="Scatterplot of image sizes", caption="Scatterplot of image sizes", name="Scatter", anchor_id="{varid}scatter".format(varid=summary["varid"]), ) image_shape = Sequence( [image_shape_freq, image_shape_scatter], sequence_type="tabs", name="Image shape", anchor_id="{varid}image_shape".format(varid=summary["varid"]), ) template_variables["bottom"].content["items"].append(image_shape) return template_variables
def render_file(summary): varid = summary["varid"] template_variables = render_path(summary) # Top template_variables["top"].content["items"][0].content["var_type"] = "File" n_freq_table_max = config["n_freq_table_max"].get(int) image_format = config["plot"]["image_format"].get(str) file_tabs = [] if "file_size" in summary: file_tabs.append( Image( histogram(*summary["histogram_file_size"]), image_format=image_format, alt="Size", caption= f"<strong>Histogram with fixed size bins of file sizes (in bytes)</strong> (bins={len(summary['histogram_file_size'][1]) - 1})", name="File size", anchor_id=f"{varid}file_size_histogram", )) file_dates = { "file_created_time": "Created", "file_accessed_time": "Accessed", "file_modified_time": "Modified", } for file_date_id, description in file_dates.items(): if file_date_id in summary: file_tabs.append( FrequencyTable( freq_table( freqtable=summary[file_date_id].value_counts(), n=summary["n"], max_number_to_print=n_freq_table_max, ), name=description, anchor_id=f"{varid}{file_date_id}", redact=False, )) file_tab = Container( file_tabs, name="File", sequence_type="tabs", anchor_id=f"{varid}file", ) template_variables["bottom"].content["items"].append(file_tab) return template_variables
def render_categorical(summary): n_obs_cat = config["vars"]["cat"]["n_obs"].get(int) template_variables = render_common(summary) # TODO: merge with boolean mini_freq_table_rows = freq_table( freqtable=summary["value_counts"], n=summary["count"], max_number_to_print=n_obs_cat, ) # Top # Element composition info = Overview( summary["varid"], summary["varname"], "Categorical", summary["warnings"] ) table = Table( [ { "name": "Distinct count", "value": summary["n_unique"], "fmt": "fmt", "class": "alert" if "n_unique" in summary["warn_fields"] else "", }, { "name": "Unique (%)", "value": summary["p_unique"], "fmt": "fmt_percent", "class": "alert" if "p_unique" in summary["warn_fields"] else "", }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "class": "alert" if "n_missing" in summary["warn_fields"] else "", }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "class": "alert" if "p_missing" in summary["warn_fields"] else "", }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", }, ] ) fqm = FrequencyTableSmall(mini_freq_table_rows) # TODO: settings 3,3,6 template_variables["top"] = Sequence([info, table, fqm], sequence_type="grid") # Bottom items = [] frequency_table = FrequencyTable( # 'frequency_table', template_variables["freq_table_rows"], name="Common Values", anchor_id="{varid}common_values".format(varid=summary["varid"]), ) items.append(frequency_table) check_compositions = config["vars"]["cat"]["check_composition"].get(bool) if check_compositions: composition = Table( [ { "name": "Contains chars", "value": summary["composition"]["chars"], "fmt": "fmt", }, { "name": "Contains digits", "value": summary["composition"]["digits"], "fmt": "fmt", }, { "name": "Contains whitespace", "value": summary["composition"]["spaces"], "fmt": "fmt", }, { "name": "Contains non-words", "value": summary["composition"]["non-words"], "fmt": "fmt", }, ], name="Composition", anchor_id="{varid}composition".format(varid=summary["varid"]), ) length = Table( [ { "name": "Max length", "value": summary["max_length"], "fmt": "fmt_numeric", }, { "name": "Mean length", "value": summary["mean_length"], "fmt": "fmt_numeric", }, { "name": "Min length", "value": summary["min_length"], "fmt": "fmt_numeric", }, ], name="Length", anchor_id="{varid}lengthstats".format(varid=summary["varid"]), ) tbl = Sequence( [composition, length], anchor_id="{varid}tbl".format(varid=summary["varid"]), name="Composition", sequence_type="grid", ) items.append(tbl) histogram_bins = 10 length = Image( histogram(summary["length"], summary, histogram_bins), alt="Scatter", name="Length", anchor_id="{varid}length".format(varid=summary["varid"]), ) items.append(length) template_variables["bottom"] = Sequence( items, sequence_type="tabs", anchor_id="{varid}bottom".format(varid=summary["varid"]), ) return template_variables
def render_path(summary): varid = summary["varid"] n_freq_table_max = config["n_freq_table_max"].get(int) image_format = config["plot"]["image_format"].get(str) template_variables = render_categorical(summary) keys = ["name", "parent", "suffix", "stem"] for path_part in keys: template_variables[f"freqtable_{path_part}"] = freq_table( freqtable=summary[f"{path_part}_counts"], n=summary["n"], max_number_to_print=n_freq_table_max, ) # Top template_variables["top"].content["items"][0].content["var_type"] = "Path" # TODO: colspan=2 # template_variables['top'].content['items'][1].content['rows'].append({'name': 'Common prefix', 'value': summary['common_prefix'], 'fmt': 'fmt'}) # { # <td>#} # { # <div style="white-space: nowrap;overflow: hidden;text-overflow: ellipsis;max-width: 600px;">#} # { # {{ values['common_prefix'] }}#} # { # </div>#} # { # </td>#} # # Bottom full = FrequencyTable( template_variables["freq_table_rows"], name="Full", anchor_id=f"{varid}full_frequency", ) stem = FrequencyTable( template_variables["freqtable_stem"], name="Stem", anchor_id=f"{varid}stem_frequency", ) name = FrequencyTable( template_variables["freqtable_name"], name="Name", anchor_id=f"{varid}name_frequency", ) suffix = FrequencyTable( template_variables["freqtable_suffix"], name="Suffix", anchor_id=f"{varid}suffix_frequency", ) parent = FrequencyTable( template_variables["freqtable_parent"], name="Parent", anchor_id=f"{varid}parent_frequency", ) template_variables["bottom"].content["items"].append(full) template_variables["bottom"].content["items"].append(stem) template_variables["bottom"].content["items"].append(name) template_variables["bottom"].content["items"].append(suffix) template_variables["bottom"].content["items"].append(parent) if "file_sizes" in summary: file_size_histogram = Image( histogram(summary["file_sizes"], summary, summary["histogram_bins"]), image_format=image_format, alt="File size", caption= f"<strong>Histogram with fixed size bins of file sizes (in bytes)</strong> (bins={summary['histogram_bins']})", name="File size", anchor_id=f"{varid}file_size_histogram", ) # TODO: in SequeencyItem template_variables["bottom"].content["items"].append( file_size_histogram) return template_variables
def render_categorical(summary): n_obs_cat = config["vars"]["cat"]["n_obs"].get(int) image_format = config["plot"]["image_format"].get(str) template_variables = render_common(summary) # TODO: merge with boolean mini_freq_table_rows = freq_table( freqtable=summary["value_counts"], n=summary["count"], max_number_to_print=n_obs_cat, ) # Top # Element composition info = Overview(summary["varid"], summary["varname"], "Categorical", summary["warnings"]) table = Table([ { "name": "Distinct count", "value": summary["n_unique"], "fmt": "fmt", "class": "alert" if "n_unique" in summary["warn_fields"] else "", }, { "name": "Unique (%)", "value": summary["p_unique"], "fmt": "fmt_percent", "class": "alert" if "p_unique" in summary["warn_fields"] else "", }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "class": "alert" if "n_missing" in summary["warn_fields"] else "", }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "class": "alert" if "p_missing" in summary["warn_fields"] else "", }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", }, ]) fqm = FrequencyTableSmall(mini_freq_table_rows) # TODO: settings 3,3,6 template_variables["top"] = Sequence([info, table, fqm], sequence_type="grid") # Bottom items = [] frequency_table = FrequencyTable( template_variables["freq_table_rows"], name="Common Values", anchor_id="{varid}common_values".format(varid=summary["varid"]), ) items.append(frequency_table) check_compositions = config["vars"]["cat"]["check_composition"].get(bool) if check_compositions: length_table = Table( [ { "name": "Max length", "value": summary["max_length"], "fmt": "fmt_numeric", }, { "name": "Mean length", "value": summary["mean_length"], "fmt": "fmt_numeric", }, { "name": "Min length", "value": summary["min_length"], "fmt": "fmt_numeric", }, ], name="Length", anchor_id="{varid}lengthstats".format(varid=summary["varid"]), ) histogram_bins = 10 length = Image( histogram(summary["length"], summary, histogram_bins), image_format=image_format, alt="Scatter", name="Length", anchor_id="{varid}length".format(varid=summary["varid"]), ) tbl = Sequence( [length, length_table], anchor_id="{varid}tbl".format(varid=summary["varid"]), name="Length", sequence_type="grid", ) items.append(tbl) n_freq_table_max = config["n_freq_table_max"].get(int) citems = [] vc = pd.Series(summary["category_alias_values"]).value_counts() citems.append( FrequencyTable( freq_table(freqtable=vc, n=vc.sum(), max_number_to_print=n_freq_table_max), name="Categories", anchor_id="{varid}category_long_values".format( varid=summary["varid"]), )) vc = pd.Series(summary["script_values"]).value_counts() citems.append( FrequencyTable( freq_table(freqtable=vc, n=vc.sum(), max_number_to_print=n_freq_table_max), name="Scripts", anchor_id="{varid}script_values".format( varid=summary["varid"]), )) vc = pd.Series(summary["block_alias_values"]).value_counts() citems.append( FrequencyTable( freq_table(freqtable=vc, n=vc.sum(), max_number_to_print=n_freq_table_max), name="Blocks", anchor_id="{varid}block_alias_values".format( varid=summary["varid"]), )) characters = Sequence( citems, name="Characters", sequence_type="tabs", anchor_id="{varid}characters".format(varid=summary["varid"]), ) items.append(characters) template_variables["bottom"] = Sequence( items, sequence_type="tabs", anchor_id="{varid}bottom".format(varid=summary["varid"]), ) return template_variables
def render_image(config: Settings, summary: dict) -> dict: varid = summary["varid"] n_freq_table_max = config.n_freq_table_max redact = config.vars.cat.redact template_variables = render_file(config, summary) # Top template_variables["top"].content["items"][0].content["var_type"] = "Image" # Bottom image_items = [] """ Min Width Min Height Min Area Mean Width Mean Height Mean Height Median Width Median Height Median Height Max Width Max Height Max Height All dimension properties are in pixels. """ image_shape_items = [ Container( [ Table([ { "name": "Min width", "value": fmt_numeric(summary["min_width"], precision=config.report.precision), "alert": False, }, { "name": "Median width", "value": fmt_numeric( summary["median_width"], precision=config.report.precision, ), "alert": False, }, { "name": "Max width", "value": fmt_numeric(summary["max_width"], precision=config.report.precision), "alert": False, }, ]), Table([ { "name": "Min height", "value": fmt_numeric(summary["min_height"], precision=config.report.precision), "alert": False, }, { "name": "Median height", "value": fmt_numeric( summary["median_height"], precision=config.report.precision, ), "alert": False, }, { "name": "Max height", "value": fmt_numeric(summary["max_height"], precision=config.report.precision), "alert": False, }, ]), Table([ { "name": "Min area", "value": fmt_numeric(summary["min_area"], precision=config.report.precision), "alert": False, }, { "name": "Median area", "value": fmt_numeric( summary["median_area"], precision=config.report.precision, ), "alert": False, }, { "name": "Max area", "value": fmt_numeric(summary["max_area"], precision=config.report.precision), "alert": False, }, ]), ], anchor_id=f"{varid}tbl", name="Overview", sequence_type="grid", ), Image( scatter_series(config, summary["image_dimensions"]), image_format=config.plot.image_format, alt="Scatter plot of image sizes", caption="Scatter plot of image sizes", name="Scatter plot", anchor_id=f"{varid}image_dimensions_scatter", ), FrequencyTable( freq_table( freqtable=summary["image_dimensions"].value_counts(), n=summary["n"], max_number_to_print=n_freq_table_max, ), name="Common values", anchor_id=f"{varid}image_dimensions_frequency", redact=False, ), ] image_shape = Container( image_shape_items, sequence_type="named_list", name="Dimensions", anchor_id=f"{varid}image_dimensions", ) if "exif_keys_counts" in summary: items = [ FrequencyTable( freq_table( freqtable=pd.Series(summary["exif_keys_counts"]), n=summary["n"], max_number_to_print=n_freq_table_max, ), name="Exif keys", anchor_id=f"{varid}exif_keys", redact=redact, ) ] for key, counts in summary["exif_data"].items(): if key == "exif_keys": continue items.append( FrequencyTable( freq_table( freqtable=counts, n=summary["n"], max_number_to_print=n_freq_table_max, ), name=key, anchor_id=f"{varid}_exif_{key}", redact=redact, )) image_items.append( Container( items, anchor_id=f"{varid}exif_data", name="Exif data", sequence_type="named_list", )) image_items.append(image_shape) image_tab = Container( image_items, name="Image", sequence_type="tabs", anchor_id=f"{varid}image", ) template_variables["bottom"].content["items"].append(image_tab) return template_variables
def render_categorical_unicode(config: Settings, summary: dict, varid: str) -> Tuple[Renderable, Renderable]: n_freq_table_max = config.n_freq_table_max category_overview = FrequencyTable( freq_table( freqtable=summary["category_alias_counts"], n=summary["category_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring categories", anchor_id=f"{varid}category_long_values", redact=False, ) cats = [] for category_alias_name, category_alias_counts in sorted( summary["category_alias_char_counts"].items(), key=lambda x: -len(x[1])): category_alias_name = category_alias_name.replace("_", " ") cats.append( FrequencyTable( freq_table( freqtable=category_alias_counts, n=category_alias_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"{category_alias_name}", anchor_id=f"{varid}category_alias_values_{category_alias_name}", redact=config.vars.cat.redact, )) category_items = [ category_overview, Container( cats, name="Most frequent character per category", sequence_type="batch_grid", anchor_id=f"{varid}categories", batch_size=2, subtitles=True, ), ] script_overview = FrequencyTable( freq_table( freqtable=summary["script_counts"], n=summary["script_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring scripts", anchor_id=f"{varid}script_values", redact=False, ) scripts = [ FrequencyTable( freq_table( freqtable=script_counts, n=script_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"{script_name}", anchor_id=f"{varid}script_values_{script_name}", redact=config.vars.cat.redact, ) for script_name, script_counts in sorted( summary["script_char_counts"].items(), key=lambda x: -len(x[1])) ] script_items = [ script_overview, Container( scripts, name="Most frequent character per script", sequence_type="batch_grid", anchor_id=f"{varid}scripts", batch_size=2, subtitles=True, ), ] block_overview = FrequencyTable( freq_table( freqtable=summary["block_alias_counts"], n=summary["block_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring blocks", anchor_id=f"{varid}block_alias_values", redact=False, ) blocks = [ FrequencyTable( freq_table( freqtable=block_counts, n=block_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"{block_name}", anchor_id=f"{varid}block_alias_values_{block_name}", redact=config.vars.cat.redact, ) for block_name, block_counts in summary["block_alias_char_counts"].items() ] block_items = [ block_overview, Container( blocks, name="Most frequent character per block", sequence_type="batch_grid", anchor_id=f"{varid}blocks", batch_size=2, subtitles=True, ), ] overview_table = Table( [ { "name": "Total characters", "value": fmt_number(summary["n_characters"]), "alert": False, }, { "name": "Distinct characters", "value": fmt_number(summary["n_characters_distinct"]), "alert": False, }, { "name": "Distinct categories", "value": f"{fmt_number(summary['n_category'])} {help(title='Unicode categories (click for more information)', url='https://en.wikipedia.org/wiki/Unicode_character_property#General_Category')}", "alert": False, }, { "name": "Distinct scripts", "value": f"{fmt_number(summary['n_scripts'])} {help(title='Unicode scripts (click for more information)', url='https://en.wikipedia.org/wiki/Script_(Unicode)#List_of_scripts_in_Unicode')}", "alert": False, }, { "name": "Distinct blocks", "value": f"{fmt_number(summary['n_block_alias'])} {help(title='Unicode blocks (click for more information)', url='https://en.wikipedia.org/wiki/Unicode_block')}", "alert": False, }, ], name="Characters and Unicode", caption= "The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables. ", ) citems = [ Container( [ FrequencyTable( freq_table( freqtable=summary["character_counts"], n=summary["n_characters"], max_number_to_print=n_freq_table_max, ), name="Most occurring characters", anchor_id=f"{varid}character_frequency", redact=config.vars.cat.redact, ), ], name="Characters", anchor_id=f"{varid}characters", sequence_type="named_list", ), Container( category_items, name="Categories", anchor_id=f"{varid}categories", sequence_type="named_list", ), Container( script_items, name="Scripts", anchor_id=f"{varid}scripts", sequence_type="named_list", ), Container( block_items, name="Blocks", anchor_id=f"{varid}blocks", sequence_type="named_list", ), ] return overview_table, Container( citems, name="Unicode", sequence_type="tabs", anchor_id=f"{varid}unicode", )
def render_categorical(config: Settings, summary: dict) -> dict: varid = summary["varid"] n_obs_cat = config.vars.cat.n_obs image_format = config.plot.image_format words = config.vars.cat.words characters = config.vars.cat.characters length = config.vars.cat.length template_variables = render_common(config, summary) info = VariableInfo( summary["varid"], summary["varname"], "Categorical", summary["alerts"], summary["description"], ) table = Table([ { "name": "Distinct", "value": fmt(summary["n_distinct"]), "alert": "n_distinct" in summary["alert_fields"], }, { "name": "Distinct (%)", "value": fmt_percent(summary["p_distinct"]), "alert": "p_distinct" in summary["alert_fields"], }, { "name": "Missing", "value": fmt(summary["n_missing"]), "alert": "n_missing" in summary["alert_fields"], }, { "name": "Missing (%)", "value": fmt_percent(summary["p_missing"]), "alert": "p_missing" in summary["alert_fields"], }, { "name": "Memory size", "value": fmt_bytesize(summary["memory_size"]), "alert": False, }, ]) fqm = FrequencyTableSmall( freq_table( freqtable=summary["value_counts_without_nan"], n=summary["count"], max_number_to_print=n_obs_cat, ), redact=config.vars.cat.redact, ) template_variables["top"] = Container([info, table, fqm], sequence_type="grid") # ============================================================================================ frequency_table = FrequencyTable( template_variables["freq_table_rows"], name="Common Values", anchor_id=f"{varid}common_values", redact=config.vars.cat.redact, ) unique_stats = render_categorical_frequency(config, summary, varid) overview_items = [] if length: length_table, length_histo = render_categorical_length( config, summary, varid) overview_items.append(length_table) if characters: overview_table_char, unitab = render_categorical_unicode( config, summary, varid) overview_items.append(overview_table_char) overview_items.append(unique_stats) if not config.vars.cat.redact: rows = ("1st row", "2nd row", "3rd row", "4th row", "5th row") sample = Table( [{ "name": name, "value": fmt(value), "alert": False, } for name, value in zip(rows, summary["first_rows"])], name="Sample", ) overview_items.append(sample) string_items: List[Renderable] = [frequency_table] if length: string_items.append(length_histo) max_unique = config.plot.pie.max_unique if max_unique > 0 and summary["n_distinct"] <= max_unique: string_items.append( Image( pie_plot( config, summary["value_counts_without_nan"], legend_kws={"loc": "upper right"}, ), image_format=image_format, alt="Pie chart", name="Pie chart", anchor_id=f"{varid}pie_chart", )) bottom_items = [ Container( overview_items, name="Overview", anchor_id=f"{varid}overview", sequence_type="batch_grid", batch_size=len(overview_items), titles=False, ), Container( string_items, name="Categories", anchor_id=f"{varid}string", sequence_type="batch_grid", batch_size=len(string_items), ), ] if words: woc = freq_table( freqtable=summary["word_counts"], n=summary["word_counts"].sum(), max_number_to_print=10, ) fqwo = FrequencyTable( woc, name="Common words", anchor_id=f"{varid}cwo", redact=config.vars.cat.redact, ) bottom_items.append( Container( [fqwo], name="Words", anchor_id=f"{varid}word", sequence_type="grid", )) if characters: bottom_items.append( Container( [unitab], name="Characters", anchor_id=f"{varid}characters", sequence_type="grid", )) # Bottom template_variables["bottom"] = Container(bottom_items, sequence_type="tabs", anchor_id=f"{varid}bottom") return template_variables
def render_categorical_unicode(summary, varid, redact): n_freq_table_max = config["n_freq_table_max"].get(int) category_items = [ FrequencyTable( freq_table( freqtable=summary["category_alias_counts"], n=summary["category_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring categories", anchor_id=f"{varid}category_long_values", redact=False, ) ] for category_alias_name, category_alias_counts in summary[ "category_alias_char_counts"].items(): category_alias_name = category_alias_name.replace("_", " ") category_items.append( FrequencyTable( freq_table( freqtable=category_alias_counts, n=category_alias_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"Most frequent {category_alias_name} characters", anchor_id=f"{varid}category_alias_values_{category_alias_name}", redact=redact, )) script_items = [ FrequencyTable( freq_table( freqtable=summary["script_counts"], n=summary["script_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring scripts", anchor_id=f"{varid}script_values", redact=False, ), ] for script_name, script_counts in summary["script_char_counts"].items(): script_items.append( FrequencyTable( freq_table( freqtable=script_counts, n=script_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"Most frequent {script_name} characters", anchor_id=f"{varid}script_values_{script_name}", redact=redact, )) block_items = [ FrequencyTable( freq_table( freqtable=summary["block_alias_counts"], n=summary["block_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring blocks", anchor_id=f"{varid}block_alias_values", redact=False, ) ] for block_name, block_counts in summary["block_alias_char_counts"].items(): block_items.append( FrequencyTable( freq_table( freqtable=block_counts, n=block_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"Most frequent {block_name} characters", anchor_id=f"{varid}block_alias_values_{block_name}", redact=redact, )) citems = [ Container( [ Table( [ { "name": "Unique unicode characters", "value": summary["n_characters"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Unique unicode categories", "value": f"{summary['n_category']} {help(title='Unicode categories (click for more information)', url='https://en.wikipedia.org/wiki/Unicode_character_property#General_Category')}", "fmt": "raw", "alert": False, }, { "name": "Unique unicode scripts", "value": f"{summary['n_scripts']} {help(title='Unicode scripts (click for more information)', url='https://en.wikipedia.org/wiki/Script_(Unicode)#List_of_scripts_in_Unicode')}", "fmt": "raw", "alert": False, }, { "name": "Unique unicode blocks", "value": f"{summary['n_block_alias']} {help(title='Unicode blocks (click for more information)', url='https://en.wikipedia.org/wiki/Unicode_block')}", "fmt": "raw", "alert": False, }, ], name="Overview of Unicode Properties", caption= "The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables. ", ), ], anchor_id=f"{varid}character_overview", name="Overview", sequence_type="list", ), Container( [ FrequencyTable( freq_table( freqtable=summary["character_counts"], n=summary["character_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring characters", anchor_id=f"{varid}character_frequency", redact=redact, ), ], name="Characters", anchor_id=f"{varid}characters", sequence_type="named_list", ), Container( category_items, name="Categories", anchor_id=f"{varid}categories", sequence_type="named_list", ), Container( script_items, name="Scripts", anchor_id=f"{varid}scripts", sequence_type="named_list", ), Container( block_items, name="Blocks", anchor_id=f"{varid}blocks", sequence_type="named_list", ), ] return Container( citems, name="Unicode", sequence_type="tabs", anchor_id=f"{varid}unicode", )
def render_boolean(config: Settings, summary: dict) -> dict: varid = summary["varid"] n_obs_bool = config.vars.bool.n_obs image_format = config.plot.image_format # Prepare variables template_variables = render_common(config, summary) # Element composition info = VariableInfo( anchor_id=summary["varid"], alerts=summary["alerts"], var_type="Boolean", var_name=summary["varname"], description=summary["description"], ) table = Table( [ { "name": "Distinct", "value": fmt(summary["n_distinct"]), "alert": "n_distinct" in summary["alert_fields"], }, { "name": "Distinct (%)", "value": fmt_percent(summary["p_distinct"]), "alert": "p_distinct" in summary["alert_fields"], }, { "name": "Missing", "value": fmt(summary["n_missing"]), "alert": "n_missing" in summary["alert_fields"], }, { "name": "Missing (%)", "value": fmt_percent(summary["p_missing"]), "alert": "p_missing" in summary["alert_fields"], }, { "name": "Memory size", "value": fmt_bytesize(summary["memory_size"]), "alert": False, }, ] ) fqm = FrequencyTableSmall( freq_table( freqtable=summary["value_counts_without_nan"], n=summary["n"], max_number_to_print=n_obs_bool, ), redact=False, ) template_variables["top"] = Container([info, table, fqm], sequence_type="grid") items: List[Renderable] = [ FrequencyTable( template_variables["freq_table_rows"], name="Common Values", anchor_id=f"{varid}frequency_table", redact=False, ) ] max_unique = config.plot.pie.max_unique if max_unique > 0: items.append( Image( pie_plot( config, summary["value_counts_without_nan"], legend_kws={"loc": "upper right"}, ), image_format=image_format, alt="Chart", name="Chart", anchor_id=f"{varid}pie_chart", ) ) template_variables["bottom"] = Container( items, sequence_type="tabs", anchor_id=f"{varid}bottom" ) return template_variables
def render_boolean(summary): varid = summary["varid"] n_obs_bool = config["vars"]["bool"]["n_obs"].get(int) image_format = config["plot"]["image_format"].get(str) # Prepare variables template_variables = render_common(summary) # Element composition info = VariableInfo( anchor_id=summary["varid"], warnings=summary["warnings"], var_type="Boolean", var_name=summary["varname"], description=summary["description"], ) table = Table([ { "name": "Distinct", "value": summary["n_distinct"], "fmt": "fmt", "alert": "n_distinct" in summary["warn_fields"], }, { "name": "Distinct (%)", "value": summary["p_distinct"], "fmt": "fmt_percent", "alert": "p_distinct" in summary["warn_fields"], }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "alert": "n_missing" in summary["warn_fields"], }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "alert": "p_missing" in summary["warn_fields"], }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", "alert": False, }, ]) fqm = FrequencyTableSmall( freq_table( freqtable=summary["value_counts"], n=summary["n"], max_number_to_print=n_obs_bool, ), redact=False, ) template_variables["top"] = Container([info, table, fqm], sequence_type="grid") items = [ FrequencyTable( template_variables["freq_table_rows"], name="Common Values", anchor_id=f"{varid}frequency_table", redact=False, ) ] max_unique = config["plot"]["pie"]["max_unique"].get(int) if max_unique > 0: items.append( Image( pie_plot(summary["value_counts"], legend_kws={"loc": "upper right"}), image_format=image_format, alt="Chart", name="Chart", anchor_id=f"{varid}pie_chart", )) template_variables["bottom"] = Container(items, sequence_type="tabs", anchor_id=f"{varid}bottom") return template_variables
def render_categorical_unicode(summary, varid, redact): n_freq_table_max = config["n_freq_table_max"].get(int) category_items = [ FrequencyTable( freq_table( freqtable=summary["category_alias_counts"], n=summary["category_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="最常见分类", anchor_id=f"{varid}category_long_values", redact=False, ) ] for category_alias_name, category_alias_counts in summary[ "category_alias_char_counts" ].items(): category_alias_name = category_alias_name.replace("_", " ") category_items.append( FrequencyTable( freq_table( freqtable=category_alias_counts, n=category_alias_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"最常见字符 {category_alias_name} ", anchor_id=f"{varid}category_alias_values_{category_alias_name}", redact=redact, ) ) script_items = [ FrequencyTable( freq_table( freqtable=summary["script_counts"], n=summary["script_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="最常见值", anchor_id=f"{varid}script_values", redact=False, ), ] for script_name, script_counts in summary["script_char_counts"].items(): script_items.append( FrequencyTable( freq_table( freqtable=script_counts, n=script_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"最常见 {script_name} 字符", anchor_id=f"{varid}script_values_{script_name}", redact=redact, ) ) block_items = [ FrequencyTable( freq_table( freqtable=summary["block_alias_counts"], n=summary["block_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="最常见区段", anchor_id=f"{varid}block_alias_values", redact=False, ) ] for block_name, block_counts in summary["block_alias_char_counts"].items(): block_items.append( FrequencyTable( freq_table( freqtable=block_counts, n=block_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"最频繁 {block_name} 字符", anchor_id=f"{varid}block_alias_values_{block_name}", redact=redact, ) ) citems = [ Container( [ Table( [ { "name": "字符", "value": summary["n_characters"], "fmt": "fmt_numeric", "alert": False, }, { "name": '类别 (<a target="_blank" href="https://en.wikipedia.org/wiki/Unicode_character_property#General_Category">?</a>)', "value": summary["n_category"], "fmt": "fmt_numeric", "alert": False, }, { "name": '书写系统 (<a target="_blank" href="https://en.wikipedia.org/wiki/Script_(Unicode)#List_of_scripts_in_Unicode">?</a>)', "value": summary["n_scripts"], "fmt": "fmt_numeric", "alert": False, }, { "name": '区段 (<a target="_blank" href="https://en.wikipedia.org/wiki/Unicode_block">?</a>)', "value": summary["n_block_alias"], "fmt": "fmt_numeric", "alert": False, }, ], name="Unicode属性概述", caption="Unicode标准为每个字符提供了唯一的数字编号(code point),可以用来分析文本变量。", ), ], anchor_id=f"{varid}character_overview", name="概要", sequence_type="list", ), Container( [ FrequencyTable( freq_table( freqtable=summary["character_counts"], n=summary["character_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="最常出现的字符", anchor_id=f"{varid}character_frequency", redact=redact, ), ], name="字符", anchor_id=f"{varid}characters", sequence_type="named_list", ), Container( category_items, name="分类", anchor_id=f"{varid}categories", sequence_type="named_list", ), Container( script_items, name="书写系统", anchor_id=f"{varid}scripts", sequence_type="named_list", ), Container( block_items, name="区段", anchor_id=f"{varid}blocks", sequence_type="named_list", ), ] return Container( citems, name="Unicode", sequence_type="tabs", anchor_id=f"{varid}unicode", )
def render_categorical(summary): varid = summary["varid"] n_obs_cat = config["vars"]["cat"]["n_obs"].get(int) image_format = config["plot"]["image_format"].get(str) redact = config["vars"]["cat"]["redact"].get(bool) template_variables = render_common(summary) info = VariableInfo( summary["varid"], summary["varname"], "分类变量", summary["warnings"], summary["description"], ) table = Table( [ { "name": "唯一值计数", "value": summary["n_unique"], "fmt": "fmt", "alert": "n_unique" in summary["warn_fields"], }, { "name": "唯一值比例 (%)", "value": summary["p_unique"], "fmt": "fmt_percent", "alert": "p_unique" in summary["warn_fields"], }, { "name": "缺失值", "value": summary["n_missing"], "fmt": "fmt", "alert": "n_missing" in summary["warn_fields"], }, { "name": "缺失值比例(%)", "value": summary["p_missing"], "fmt": "fmt_percent", "alert": "p_missing" in summary["warn_fields"], }, { "name": "内存占用", "value": summary["memory_size"], "fmt": "fmt_bytesize", "alert": False, }, ] ) fqm = FrequencyTableSmall( freq_table( freqtable=summary["value_counts"], n=summary["count"], max_number_to_print=n_obs_cat, ), redact=redact, ) template_variables["top"] = Container([info, table, fqm], sequence_type="grid") # Bottom items = [ FrequencyTable( template_variables["freq_table_rows"], name="常见值", anchor_id=f"{varid}common_values", redact=redact, ) ] max_unique = config["plot"]["pie"]["max_unique"].get(int) if max_unique > 0 and summary["n_unique"] <= max_unique: items.append( Image( pie_plot(summary["value_counts"], legend_kws={"loc": "upper right"}), image_format=image_format, alt="Chart", name="图表", anchor_id=f"{varid}pie_chart", ) ) check_length = config["vars"]["cat"]["length"].get(bool) if check_length: items.append(render_categorical_length(summary, varid, image_format)) check_unicode = config["vars"]["cat"]["unicode"].get(bool) if check_unicode: items.append(render_categorical_unicode(summary, varid, redact)) template_variables["bottom"] = Container( items, sequence_type="tabs", anchor_id=f"{varid}bottom" ) return template_variables
def render_categorical(summary): varid = summary["varid"] n_obs_cat = config["vars"]["cat"]["n_obs"].get(int) image_format = config["plot"]["image_format"].get(str) template_variables = render_common(summary) # TODO: merge with boolean mini_freq_table_rows = freq_table( freqtable=summary["value_counts"], n=summary["count"], max_number_to_print=n_obs_cat, ) # Top # Element composition info = VariableInfo( summary["varid"], summary["varname"], "Categorical", summary["warnings"], summary["description"], ) table = Table([ { "name": "Distinct count", "value": summary["n_unique"], "fmt": "fmt", "alert": "n_unique" in summary["warn_fields"], }, { "name": "Unique (%)", "value": summary["p_unique"], "fmt": "fmt_percent", "alert": "p_unique" in summary["warn_fields"], }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "alert": "n_missing" in summary["warn_fields"], }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "alert": "p_missing" in summary["warn_fields"], }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", "alert": False, }, ]) fqm = FrequencyTableSmall(mini_freq_table_rows) # TODO: settings 3,3,6 template_variables["top"] = Container([info, table, fqm], sequence_type="grid") # Bottom items = [] frequency_table = FrequencyTable( template_variables["freq_table_rows"], name="Common Values", anchor_id=f"{varid}common_values", ) items.append(frequency_table) check_length = config["vars"]["cat"]["length"].get(bool) if check_length: length_table = Table( [ { "name": "Max length", "value": summary["max_length"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Median length", "value": summary["median_length"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Mean length", "value": summary["mean_length"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Min length", "value": summary["min_length"], "fmt": "fmt_numeric", "alert": False, }, ], name="Length", anchor_id=f"{varid}lengthstats", ) histogram_bins = 10 length = Image( histogram(summary["length"], summary, histogram_bins), image_format=image_format, alt="Scatter", name="Length", anchor_id=f"{varid}length", ) length_tab = Container( [length, length_table], anchor_id=f"{varid}tbl", name="Length", sequence_type="grid", ) items.append(length_tab) check_unicode = config["vars"]["cat"]["unicode"].get(bool) if check_unicode: n_freq_table_max = config["n_freq_table_max"].get(int) category_items = [ FrequencyTable( freq_table( freqtable=summary["category_alias_counts"], n=summary["category_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring categories", anchor_id=f"{varid}category_long_values", ) ] for category_alias_name, category_alias_counts in summary[ "category_alias_char_counts"].items(): category_alias_name = category_alias_name.replace("_", " ") category_items.append( FrequencyTable( freq_table( freqtable=category_alias_counts, n=category_alias_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"Most frequent {category_alias_name} characters", anchor_id= f"{varid}category_alias_values_{category_alias_name}", )) script_items = [ FrequencyTable( freq_table( freqtable=summary["script_counts"], n=summary["script_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring scripts", anchor_id=f"{varid}script_values", ), ] for script_name, script_counts in summary["script_char_counts"].items( ): script_items.append( FrequencyTable( freq_table( freqtable=script_counts, n=script_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"Most frequent {script_name} characters", anchor_id=f"{varid}script_values_{script_name}", )) block_items = [ FrequencyTable( freq_table( freqtable=summary["block_alias_counts"], n=summary["block_alias_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring blocks", anchor_id=f"{varid}block_alias_values", ) ] for block_name, block_counts in summary[ "block_alias_char_counts"].items(): block_items.append( FrequencyTable( freq_table( freqtable=block_counts, n=block_counts.sum(), max_number_to_print=n_freq_table_max, ), name=f"Most frequent {block_name} characters", anchor_id=f"{varid}block_alias_values_{block_name}", )) citems = [ Container( [ Table( [ { "name": "Unique unicode characters", "value": summary["n_characters"], "fmt": "fmt_numeric", "alert": False, }, { "name": 'Unique unicode categories (<a target="_blank" href="https://en.wikipedia.org/wiki/Unicode_character_property#General_Category">?</a>)', "value": summary["n_category"], "fmt": "fmt_numeric", "alert": False, }, { "name": 'Unique unicode scripts (<a target="_blank" href="https://en.wikipedia.org/wiki/Script_(Unicode)#List_of_scripts_in_Unicode">?</a>)', "value": summary["n_scripts"], "fmt": "fmt_numeric", "alert": False, }, { "name": 'Unique unicode blocks (<a target="_blank" href="https://en.wikipedia.org/wiki/Unicode_block">?</a>)', "value": summary["n_block_alias"], "fmt": "fmt_numeric", "alert": False, }, ], name="Overview of Unicode Properties", caption= "The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables. ", ), ], anchor_id=f"{varid}character_overview", name="Overview", sequence_type="list", ), Container( [ FrequencyTable( freq_table( freqtable=summary["character_counts"], n=summary["character_counts"].sum(), max_number_to_print=n_freq_table_max, ), name="Most occurring characters", anchor_id=f"{varid}character_frequency", ), ], name="Characters", anchor_id=f"{varid}characters", sequence_type="named_list", ), Container( category_items, name="Categories", anchor_id=f"{varid}categories", sequence_type="named_list", ), Container( script_items, name="Scripts", anchor_id=f"{varid}scripts", sequence_type="named_list", ), Container( block_items, name="Blocks", anchor_id=f"{varid}blocks", sequence_type="named_list", ), ] characters = Container( citems, name="Unicode", sequence_type="tabs", anchor_id=f"{varid}unicode", ) items.append(characters) template_variables["bottom"] = Container(items, sequence_type="tabs", anchor_id=f"{varid}bottom") return template_variables
def render_url(summary): varid = summary["varid"] n_freq_table_max = config["n_freq_table_max"].get(int) n_obs_cat = config["vars"]["cat"]["n_obs"].get(int) redact = config["vars"]["cat"]["redact"].get(bool) template_variables = render_common(summary) keys = ["scheme", "netloc", "path", "query", "fragment"] for url_part in keys: template_variables[f"freqtable_{url_part}"] = freq_table( freqtable=summary[f"{url_part}_counts"], n=summary["n"], max_number_to_print=n_freq_table_max, ) full_frequency_table = FrequencyTable( template_variables["freq_table_rows"], name="Full", anchor_id=f"{varid}full_frequency", redact=redact, ) scheme_frequency_table = FrequencyTable( template_variables["freqtable_scheme"], name="Scheme", anchor_id=f"{varid}scheme_frequency", redact=redact, ) netloc_frequency_table = FrequencyTable( template_variables["freqtable_netloc"], name="Netloc", anchor_id=f"{varid}netloc_frequency", redact=redact, ) path_frequency_table = FrequencyTable( template_variables["freqtable_path"], name="Path", anchor_id=f"{varid}path_frequency", redact=redact, ) query_frequency_table = FrequencyTable( template_variables["freqtable_query"], name="Query", anchor_id=f"{varid}query_frequency", redact=redact, ) fragment_frequency_table = FrequencyTable( template_variables["freqtable_fragment"], name="Fragment", anchor_id=f"{varid}fragment_frequency", redact=redact, ) items = [ full_frequency_table, scheme_frequency_table, netloc_frequency_table, path_frequency_table, query_frequency_table, fragment_frequency_table, ] template_variables["bottom"] = Container(items, sequence_type="tabs", name="url stats", anchor_id=f"{varid}urlstats") # Element composition info = VariableInfo( summary["varid"], summary["varname"], "URL", summary["warnings"], summary["description"], ) table = Table([ { "name": "Distinct", "value": summary["n_distinct"], "fmt": "fmt", "alert": "n_distinct" in summary["warn_fields"], }, { "name": "Distinct (%)", "value": summary["p_distinct"], "fmt": "fmt_percent", "alert": "p_distinct" in summary["warn_fields"], }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "alert": "n_missing" in summary["warn_fields"], }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "alert": "p_missing" in summary["warn_fields"], }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", "alert": False, }, ]) fqm = FrequencyTableSmall( freq_table( freqtable=summary["value_counts"], n=summary["n"], max_number_to_print=n_obs_cat, ), redact=redact, ) template_variables["top"] = Container([info, table, fqm], sequence_type="grid") return template_variables
def render_boolean(summary): varid = summary["varid"] n_obs_bool = config["vars"]["bool"]["n_obs"].get(int) # Prepare variables template_variables = render_common(summary) mini_freq_table_rows = freq_table( freqtable=summary["value_counts"], n=summary["n"], max_number_to_print=n_obs_bool, ) # Element composition info = VariableInfo( anchor_id=summary["varid"], warnings=summary["warnings"], var_type="Boolean", var_name=summary["varname"], ) table = Table( [ { "name": "Distinct count", "value": summary["n_unique"], "fmt": "fmt", "alert": "n_unique" in summary["warn_fields"], }, { "name": "Unique (%)", "value": summary["p_unique"], "fmt": "fmt_percent", "alert": "p_unique" in summary["warn_fields"], }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "alert": "n_missing" in summary["warn_fields"], }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "alert": "p_missing" in summary["warn_fields"], }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", "alert": False, }, ] ) fqm = FrequencyTableSmall(mini_freq_table_rows) template_variables["top"] = Container([info, table, fqm], sequence_type="grid") freqtable = FrequencyTable( template_variables["freq_table_rows"], name="Frequency Table", anchor_id=f"{varid}frequency_table", ) template_variables["bottom"] = Container( [freqtable], sequence_type="tabs", anchor_id=f"{varid}bottom" ) return template_variables
def render_url(summary): n_freq_table_max = config["n_freq_table_max"].get(int) n_obs_cat = config["vars"]["cat"]["n_obs"].get(int) # TODO: merge with boolean/categorical mini_freq_table_rows = freq_table(freqtable=summary["value_counts"], n=summary["n"], max_number_to_print=n_obs_cat) template_variables = render_common(summary) keys = ["scheme", "netloc", "path", "query", "fragment"] for url_part in keys: template_variables["freqtable_{}".format(url_part)] = freq_table( freqtable=summary["{}_counts".format(url_part)], n=summary["n"], max_number_to_print=n_freq_table_max, ) full_frequency_table = FrequencyTable( template_variables["freq_table_rows"], name="Full", anchor_id="{varid}full_frequency".format(varid=summary["varid"]), ) scheme_frequency_table = FrequencyTable( template_variables["freqtable_scheme"], name="Scheme", anchor_id="{varid}scheme_frequency".format(varid=summary["varid"]), ) netloc_frequency_table = FrequencyTable( template_variables["freqtable_netloc"], name="Netloc", anchor_id="{varid}netloc_frequency".format(varid=summary["varid"]), ) path_frequency_table = FrequencyTable( template_variables["freqtable_path"], name="Path", anchor_id="{varid}path_frequency".format(varid=summary["varid"]), ) query_frequency_table = FrequencyTable( template_variables["freqtable_query"], name="Query", anchor_id="{varid}query_frequency".format(varid=summary["varid"]), ) fragment_frequency_table = FrequencyTable( template_variables["freqtable_fragment"], name="Fragment", anchor_id="{varid}fragment_frequency".format(varid=summary["varid"]), ) items = [ full_frequency_table, scheme_frequency_table, netloc_frequency_table, path_frequency_table, query_frequency_table, fragment_frequency_table, ] template_variables["bottom"] = Sequence(items, sequence_type="tabs") # Element composition info = Overview(summary["varid"], summary["varname"], "URL", summary["warnings"]) table = Table([ { "name": "Distinct count", "value": summary["n_unique"], "fmt": "fmt" }, { "name": "Unique (%)", "value": summary["p_unique"], "fmt": "fmt_percent" }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt" }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", }, ]) fqm = FrequencyTableSmall(mini_freq_table_rows) # TODO: settings 3,3,6 template_variables["top"] = Sequence([info, table, fqm], sequence_type="grid") return template_variables
def render_path(summary): varid = summary["varid"] n_freq_table_max = config["n_freq_table_max"].get(int) redact = config["vars"]["cat"]["redact"].get(bool) template_variables = render_categorical(summary) keys = ["name", "parent", "suffix", "stem", "anchor"] for path_part in keys: template_variables[f"freqtable_{path_part}"] = freq_table( freqtable=summary[f"{path_part}_counts"], n=summary["n"], max_number_to_print=n_freq_table_max, ) # Top template_variables["top"].content["items"][0].content["var_type"] = "Path" # Bottom path_overview_tab = Container( [ Table([ { "name": "Common prefix", "value": summary["common_prefix"], "fmt": "fmt", "alert": False, }, { "name": "Unique stems", "value": summary["n_stem_unique"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Unique names", "value": summary["n_name_unique"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Unique extensions", "value": summary["n_suffix_unique"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Unique directories", "value": summary["n_parent_unique"], "fmt": "fmt_numeric", "alert": False, }, { "name": "Unique anchors", "value": summary["n_anchor_unique"], "fmt": "fmt_numeric", "alert": False, }, ]) ], anchor_id=f"{varid}tbl", name="Overview", sequence_type="list", ) path_items = [ path_overview_tab, FrequencyTable( template_variables["freq_table_rows"], name="Full", anchor_id=f"{varid}full_frequency", redact=redact, ), FrequencyTable( template_variables["freqtable_stem"], name="Stem", anchor_id=f"{varid}stem_frequency", redact=redact, ), FrequencyTable( template_variables["freqtable_name"], name="Name", anchor_id=f"{varid}name_frequency", redact=redact, ), FrequencyTable( template_variables["freqtable_suffix"], name="Extension", anchor_id=f"{varid}suffix_frequency", redact=redact, ), FrequencyTable( template_variables["freqtable_parent"], name="Parent", anchor_id=f"{varid}parent_frequency", redact=redact, ), FrequencyTable( template_variables["freqtable_anchor"], name="Anchor", anchor_id=f"{varid}anchor_frequency", redact=redact, ), ] path_tab = Container( path_items, name="Path", sequence_type="tabs", anchor_id=f"{varid}path", ) template_variables["bottom"].content["items"].append(path_tab) return template_variables