Python MarkdownTableWriter.dumps示例，pytablewriter.MarkdownTableWriter.dumps Python示例

示例#1

0

显示文件

文件： calculation_documentation_generator.py 项目： Recidiviz/pulse-data

    def _get_metrics_str_for_product(self, metric_keys: Set[DagKey]) -> str:
        """Builds the Metrics string for the product markdown file. Creates a table of
        necessary metric types and whether a state calculates those metrics"""
        metrics_header = (
            "##METRICS\n_All metrics required to support this product and"
            " whether or not each state regularly calculates the metric._"
            "\n\n** DISCLAIMER **\nThe presence of all required metrics"
            " for a state does not guarantee that this product is ready to"
            " launch in that state.\n\n")

        if not metric_keys:
            return (metrics_header +
                    "*This product does not rely on Dataflow metrics.*\n")
        state_codes = sorted(self._get_dataflow_pipeline_enabled_states(),
                             key=lambda code: code.value)

        headers = ["**Metric**"] + [
            f"**{state_code.value}**" for state_code in state_codes
        ]

        table_matrix = [[
            f"[{DATAFLOW_TABLES_TO_METRIC_TYPES[metric_key.table_id].value}](../../metrics/{self.generic_types_by_metric_name[metric_key.table_id].lower()}/{metric_key.table_id}.md)"
        ] + [
            "X"
            if DATAFLOW_TABLES_TO_METRIC_TYPES[metric_key.table_id].value in [
                metric.name for metric in self.metric_calculations_by_state[
                    str(state_code.get_state())]
            ] else "" for state_code in state_codes
        ] for metric_key in sorted(metric_keys,
                                   key=lambda dag_key: dag_key.table_id)]

        writer = MarkdownTableWriter(headers=headers,
                                     value_matrix=table_matrix,
                                     margin=0)
        return metrics_header + writer.dumps()

示例#2

0

显示文件

 def event_report(self):
     # TODO: Use pandas dataframe's feature
     _all_reports = ''
     all_events = collections.OrderedDict(
         sorted(self._container.events.items(), key=lambda t: len(t[0])))
     for attr, event_list in all_events.items():
         writer = MarkdownTableWriter()
         writer.title = attr
         if len(event_list) == 0:
             continue
         else:
             keys = list(event_list[0].keys())
         values = []
         writer.headers = keys
         for event in event_list:
             line = []
             for k in keys:
                 event_val = event[k]
                 if isinstance(event_val, (np.ndarray, np.generic)):
                     event_val = event_val.tolist()
                 if isinstance(event_val, int):
                     line.append(event_val)
                 elif isinstance(event_val, float):
                     line.append(event_val)
                 elif isinstance(event_val, list):
                     if all(isinstance(n, int) for n in event_val):
                         line.append(', '.join(str(v) for v in event_val))
                     elif all(isinstance(n, float) for n in event_val):
                         line.append(', '.join('{0:.5f}'.format(v)
                                               for v in event_val))
             values.append(line)
         writer.value_matrix = values
         _all_reports += writer.dumps()
     return _all_reports

示例#3

0

显示文件

文件： entity_documentation_generator.py 项目： Recidiviz/pulse-data

    def _get_fields(fields: List[sqlalchemy.Column]) -> str:
        """Returns a table of the entity's fields and their corresponding descriptions."""
        if fields is None:
            return "No Fields"
        if not fields:
            return "<No columns>"

        table_matrix = []
        for field in fields:
            if field.comment is None:
                raise ValueError(
                    f"Every entity field must have an associated comment. "
                    f"Field {field.name} has no comment.")
            field_values = [
                field.name,
                field.comment,
                f"ENUM: <br />{'<br />'.join([f'{e}' for e in field.type.enums])}"
                if hasattr(field.type, "enums") else
                field.type.python_type.__name__.upper(),
            ]
            table_matrix.append(field_values)

        writer = MarkdownTableWriter(
            headers=[
                "Entity Field",
                "Entity Description",
                "Entity Type",
            ],
            value_matrix=table_matrix,
            margin=0,
        )
        return writer.dumps()

示例#4

0

显示文件

def game_after_survival_training_no_interrupt(repeat_times):
    move_counts = 12000
    data = [['Lives Consumed', 'Moves', 'Ratio']]
    for count in range(repeat_times):
        mp, sc, lil = curses.wrapper(draw_menu_after_survival_no_interrupt,
                                     move_counts)
        ratioo = float(lil) / float(mp)
        data.append([lil, mp, ratioo])
    data_np = pd.DataFrame(data)
    data_np_lack = pd.DataFrame(data[1:])
    data_np = data_np.append(data_np_lack.mean(axis=0), ignore_index=True)
    data_np = np.transpose(data_np)
    data_np_list = np.array(data_np).tolist()

    writer = MarkdownTableWriter()
    writer.table_name = "collecting data after survival training"
    writer.headers = [" "] + \
        [str(i + 1) for i in range(repeat_times)] + ["average"]
    writer.value_matrix = data_np_list
    table_output = writer.dumps()

    logger = logging.getLogger('no_interrupt')
    no_fire_stat = 'no_fires status: ' + \
        str(nf_global_survival_training) + '\n'
    enemy_freq_stat = 'enemy freq status: ' + \
        str(enemy_freq_sur_train) + '\n'
    ot_str = no_fire_stat + enemy_freq_stat + \
        'After survival training: \n' + table_output
    logger.info(ot_str)

示例#5

0

显示文件

def print_starred_info(starred_info_set, repo_depth_map, verbosity):
    records = []
    for info in sorted(starred_info_set):
        record = [
            info.pypi_pkg_name,
            info.github_repo_id,
            _star_status_map[info.star_status],
            info.is_owned if info.star_status
            in [StarStatus.STARRED, StarStatus.NOT_STARRED] else _NA,
            repo_depth_map[info.pypi_pkg_name.lower()],
            info.url,
        ]
        records.append(record)

    writer = MarkdownTableWriter()
    writer.headers = ["Package", "Repository", "Starred", "Owner"]
    if verbosity is not None:
        if verbosity >= 1:
            writer.headers += ["Depth"]

        if verbosity >= 2:
            writer.headers += ["URL"]

    writer.value_matrix = sorted(records, key=itemgetter(4,
                                                         0))  # sorted by depth
    writer.margin = 1
    writer.register_trans_func(bool_to_checkmark)
    writer.set_style("Starred", Style(align="center"))
    writer.set_style("Owner", Style(align="center"))
    pager(writer.dumps())

示例#6

0

显示文件

def generate_md_table(data, headers):
    writer = MarkdownTableWriter()
    writer.headers = headers
    writer.column_styles = [Style(align="center", font_weight="bold")
                            ] * len(headers)
    writer.value_matrix = data
    return writer.dumps()

示例#7

0

显示文件

def intent_table():
    writer = MarkdownTableWriter()
    writer.table_name = "Intent Cross-Validation Results (5 folds)"

    with open('results/intent_report.json', 'r') as f:
        data = json.loads(f.read())

    cols = ["support", "f1-score", "confused_with"]
    writer.headers = ["class"] + cols

    classes = list(data.keys())
    classes.remove('accuracy')
    classes.sort(key=lambda x: data[x]['support'], reverse=True)

    def format_cell(data, c, k):
        if not data[c].get(k):
            return "N/A"
        if k == "confused_with":
            return ", ".join([f"{k}({v})" for k, v in data[c][k].items()])
        else:
            return data[c][k]

    writer.value_matrix = [[c] + [format_cell(data, c, k) for k in cols]
                           for c in classes]

    return writer.dumps()

示例#8

0

显示文件

def entity_table():

    writer = MarkdownTableWriter()
    writer.table_name = "Entity Cross-Validation Results (5 folds)"

    with open('results/DIETClassifier_report.json', 'r') as f:
        data = json.loads(f.read())

    cols = ["support", "f1-score", "precision", "recall"]
    writer.headers = ["entity"] + cols

    classes = list(data.keys())
    classes.sort(key = lambda x: data[x]['support'], reverse=True)

    def format_cell(data, c, k):
        if not data[c].get(k):
            return "N/A"
        else:
            return data[c][k]

    writer.value_matrix = [
        [c] + [format_cell(data, c, k) for k in cols]
        for c in classes
    ]

    return writer.dumps()

示例#9

0

显示文件

文件： evaluator.py 项目： EleutherAI/lm-evaluation-harness

def make_table(result_dict):
    """Generate table of results."""
    from pytablewriter import MarkdownTableWriter, LatexTableWriter

    md_writer = MarkdownTableWriter()
    latex_writer = LatexTableWriter()
    md_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"]
    latex_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"]

    values = []

    for k, dic in result_dict["results"].items():
        version = result_dict["versions"][k]
        for m, v in dic.items():
            if m.endswith("_stderr"):
                continue

            if m + "_stderr" in dic:
                se = dic[m + "_stderr"]
                values.append([k, version, m, "%.4f" % v, "±", "%.4f" % se])
            else:
                values.append([k, version, m, "%.4f" % v, "", ""])
            k = ""
            version = ""
    md_writer.value_matrix = values
    latex_writer.value_matrix = values

    # todo: make latex table look good
    # print(latex_writer.dumps())

    return md_writer.dumps()

示例#10

0

显示文件

    def _repr_markdown_(self):
        """Outuput for Markdown."""
        is_supergroup = self.__contains_groups()

        rsl = []
        rsl += self._row_values(is_supergroup)

        if is_supergroup:
            total = str(self.total())
            rsl.append(["", "", "", ""])
            rsl.append([
                "",
                f"**_Total {self.name}_**",
                f"**_{total}_**",
                "",
            ])

        writer = MarkdownTableWriter(
            headers=[
                "Pos.",
                "Bezeichnung",
                "Betrag",
                "Anmerkung",
            ],
            column_styles=[
                TableStyle(align="right"),
                TableStyle(align="left"),
                TableStyle(align="right"),
                TableStyle(align="left"),
            ],
            value_matrix=rsl,
            margin=1,
        )
        return writer.dumps()

示例#11

0

显示文件

    def _generate_docs_for_raw_config(
        raw_file_config: DirectIngestRawFileConfig, ) -> str:
        """Generates documentation for the given raw file config and returns it as a string."""
        file_columns = sorted(raw_file_config.columns,
                              key=lambda col: col.name)
        primary_key_columns = [
            col.upper() for col in raw_file_config.primary_key_cols
        ]

        def _is_primary_key(column: str) -> str:
            return "YES" if column.upper() in primary_key_columns else ""

        documentation = (
            f"## {raw_file_config.file_tag}\n\n{raw_file_config.file_description}\n\n"
        )

        table_matrix = [[
            column.name,
            column.description or "<No documentation>",
            _is_primary_key(column.name),
        ] for column in file_columns]
        writer = MarkdownTableWriter(
            headers=["Column", "Column Description", "Part of Primary Key?"],
            value_matrix=table_matrix,
            margin=1,
        )
        documentation += writer.dumps()

        return documentation

示例#12

0

显示文件

    def _generate_raw_file_table(
        self,
        config_paths_by_file_tag: Dict[str, str],
        file_tags_with_raw_file_configs: List[str],
        views_by_raw_file: Dict[str, List[str]],
    ) -> str:
        """Generates a Markdown-formatted table of contents to be included in a raw file specification."""
        table_matrix = [[
            (f"[{file_tag}](#{file_tag})" if file_tag
             in file_tags_with_raw_file_configs else f"{file_tag}"),
            ",<br />".join(views_by_raw_file[file_tag]),
            self._get_last_updated(config_paths_by_file_tag[file_tag]),
            self._get_updated_by(config_paths_by_file_tag[file_tag]),
        ] for file_tag in sorted(config_paths_by_file_tag)]
        writer = MarkdownTableWriter(
            headers=[
                "**Table**",
                "**Referencing Views**",
                "**Last Updated**",
                "**Updated By**",
            ],
            value_matrix=table_matrix,
            margin=1,
        )

        return writer.dumps()

示例#13

0

显示文件

def create_section(section_list, name):
    counter = 0
    section_str = '## [↑](#-table-of-contents) {}\n\n'.format(name)
    # header = ['Task', 'Dataset', 'SOTA', 'Metric', 'SOTA Acc', 'Our Acc', 'Our Model', '📝', 'Notebook']
    # header = ['Task', 'Dataset', 'SOTA', 'Metric', 'SOTA Acc', 'Our Acc', '📝', 'Notebook']
    header = [
        'Task', 'Dataset', 'SOTA', 'SOTA Acc', 'Our Acc', '📝', 'Notebook'
    ]
    values_matrix = []
    for row in section_list:
        values_matrix.append([
            row[0],
            '[{}]({})'.format(row[1], row[2]) if row[2] else row[1],
            '[{}]({})'.format(row[3], row[4]) if row[4] else row[3],
            # row[5],
            row[6],
            row[7],
            # row[8],
            '[📝]({} "Article")'.format(row[9]),
            '[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]'
            '(https://colab.research.google.com/github/eugenesiow/practical-ml/blob/master/notebooks/{}'
            ' "Open in Colab")'.format(row[10])
        ])
        counter += 1
    writer = MarkdownTableWriter(
        headers=header,
        value_matrix=values_matrix,
    )
    section_str += writer.dumps()
    section_str += '\n\n'
    return section_str, counter

示例#14

0

显示文件

def mk_table(datasets):
    values = []

    total_weight = sum([x[1] * x[0].size() for x in datasets])

    train_chars = 1.2e12

    for dataset, weight in datasets:
        size = dataset.size()
        relative_weight = size * weight / total_weight
        values.append([
            dataset.name(), size, '{:.2%}'.format(relative_weight),
            train_chars / size * relative_weight,
            humanbytes(size / dataset.num_docs())
        ])

    values.sort(key=lambda x: -x[1])
    values.append([
        '**Total**',
        sum([x[1] for x in values]), "", "",
        humanbytes(
            sum([x[1] for x in values]) / sum(x[0].num_docs()
                                              for x in datasets))
    ])
    values = [[x[0], humanbytes(x[1]), x[2], x[3], x[4]] for x in values]

    writer = MarkdownTableWriter()
    writer.table_name = "The Pile™"
    writer.headers = [
        "Component", "Size", "Weight", "Epochs (@1.2TB)", "Mean Document Size"
    ]
    writer.value_matrix = values
    return writer.dumps()

示例#15

0

显示文件

 def overall_report(self):
     writer = MarkdownTableWriter()
     writer.title = 'overall'
     flattens = self._container.flatten
     writer.headers = ['metric', 'value']
     values = []
     for k, v in flattens.items():
         values.append([k, v])
     writer.value_matrix = values
     return writer.dumps()

示例#16

0

显示文件

def print_nodes_mark_down(node_type, nodes):
    writer = MarkdownTableWriter()

    writer.headers = [
        'ID', 'Link to GitHub', "Created date", "Status", 'Title',
        'Assigned to'
    ]

    writer.value_matrix = filter_nodes_data(node_type, nodes)

    print(writer.dumps())

示例#17

0

显示文件

    def _generate_docs_for_raw_config(
        raw_file_config: DirectIngestRawFileConfig,
    ) -> str:
        """Generates documentation for the given raw file config and returns it as a string."""
        primary_key_columns = [col.upper() for col in raw_file_config.primary_key_cols]

        def _is_primary_key(column: str) -> str:
            return "YES" if column.upper() in primary_key_columns else ""

        def _get_enum_bullets(known_values: Optional[List[ColumnEnumValueInfo]]) -> str:
            if known_values is None:
                return "N/A"
            if not known_values:
                return "<No documentation>"
            list_contents = ", <br/>".join(
                [
                    f"<b>{enum.value}: </b> {enum.description if enum.description else 'Unknown'}"
                    for enum in known_values
                ]
            )
            return list_contents

        documentation = (
            f"## {raw_file_config.file_tag}\n\n{raw_file_config.file_description}\n\n"
        )

        table_matrix = [
            [
                column.name,
                column.description or "<No documentation>",
                _is_primary_key(column.name),
                _get_enum_bullets(column.known_values),
            ]
            for column in raw_file_config.columns
        ]
        writer = MarkdownTableWriter(
            headers=[
                "Column",
                "Column Description",
                "Part of Primary Key?",
                "Distinct Values",
            ],
            value_matrix=table_matrix,
            # Margin values other than 0 have nondeterministic spacing. Do not change.
            margin=0,
        )
        documentation += writer.dumps()

        return documentation

示例#18

0

显示文件

文件： watcher.py 项目： bisguzar/social-watcher

    def __create_table(self, platform: str, username: str,
                       changed_datas: Dict[str, Union[str, int]]) -> str:
        table = MarkdownTableWriter()
        table.table_name = f"{platform.title()}: {username}"
        table.headers = ["Metric", "Value", "Change"]
        table.value_matrix = [[
            key,
            value["current"],
            value.get("change", "n/a")
            if value.get("change", False) else value["last"],
        ] for key, value in changed_datas.items()]

        table.margin = 1

        return table.dumps()

示例#19

0

显示文件

def main():
    random.seed(42)
    np.random.seed(42)

    lm = models.get_model(model).create_from_arg_string(model_args)

    if limit:
        print(
            "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
        )

    if not no_cache:
        lm = base.CachingLM(
            lm, 'lm_cache/' + model + '_' +
            model_args.replace('=', '-').replace(',', '_') + '.db')

    task_dict = tasks.get_task_dict([task])

    for desc in fewshot_descriptions:
        custom_task_dict = {
            k: CustomDescTask(v, desc)
            for k, v in task_dict.items()
        }

        results = evaluator.evaluate(lm, custom_task_dict, True, num_fewshot,
                                     limit)

        dumped = json.dumps(results, indent=2)

        print('Description:', desc)
        print(dumped)

        # MAKE TABLE
        from pytablewriter import MarkdownTableWriter

        writer = MarkdownTableWriter()
        writer.headers = ["Task", "Metric", "Value"]

        values = []

        for k, dic in results.items():
            for m, v in dic.items():
                values.append([k, m, '%.4f' % v])
                k = ""
        writer.value_matrix = values

        print(writer.dumps())

示例#20

0

显示文件

def main():

    args = parse_args()
    random.seed(args.seed)
    np.random.seed(args.seed)

    lm = models.get_model(args.model).create_from_arg_string(args.model_args)

    if args.limit:
        print(
            "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
        )

    if not args.no_cache:
        lm = base.CachingLM(
            lm, 'lm_cache/' + args.model + '_' +
            args.model_args.replace('=', '-').replace(',', '_') + '.db')
    if args.tasks == "all_tasks":
        task_names = tasks.ALL_TASKS
    else:
        task_names = args.tasks.split(",")
    task_dict = tasks.get_task_dict(task_names)

    results = evaluator.evaluate(lm, task_dict, args.provide_description,
                                 args.num_fewshot, args.limit)

    dumped = json.dumps(results, indent=2)
    print(dumped)
    if args.output_path:
        with open(args.output_path, "w") as f:
            f.write(dumped)

    # MAKE TABLE
    from pytablewriter import MarkdownTableWriter

    writer = MarkdownTableWriter()
    writer.headers = ["Task", "Metric", "Value"]

    values = []

    for k, dic in results.items():
        for m, v in dic.items():
            values.append([k, m, '%.4f' % v])
            k = ""
    writer.value_matrix = values

    print(writer.dumps())

示例#21

0

显示文件

def params_to_markdown(params_str):
    """ this converts the hyper-params into MarkDown text and we put it on Tensorboard subsequently """

    params = process_text(text=params_str)

    writer = MarkdownTableWriter()
    writer.table_name = "Hyper-parameters"
    writer.headers = ["Item", "Value"]

    items = []
    for key, value in params.items():
        items.append([str(key), str(value)])

    writer.value_matrix = items
    writer.margin = 1  # add a whitespace for both sides of each cell

    return writer.dumps()

示例#22

0

显示文件

文件： generate.py 项目： jgamper/annotated-papers

def get_table_string(authors, dates, titles, topics):
    """
    Returns an object ready to write a table
    :param authors:
    :param s:
    :param titles:
    :param topics:
    :return:
    """
    writer = MarkdownTableWriter(
        table_name="Completed Articles",
        headers=["Author", "Title", "Year", "Topics"],
        value_matrix=[[a, tit, d, ", ".join(top)]
                      for a, d, tit, top in zip(authors, dates, titles, topics)
                      ],
        margin=1,  # add a whitespace for both sides of each cell
    )
    return writer.dumps()

示例#23

0

显示文件

    def markdown_table(self, *columns):
        """Export a markdown-formatted version of the table
        
        Parameters
        ----------
        *columns
            Names of columns to include in the exported table
        """

        writer = MarkdownTableWriter()
        writer.header_list = ['Name'] + ([self.header[col] for col in columns]
                                         if columns else self.header)
        writer.value_matrix = [
            [name] + ([self.dict[name][col]
                       for col in columns] if columns else self.dict[name])
            for name in sorted(self.dict.keys())
        ]
        return writer.dumps()

示例#24

0

显示文件

文件： cost_estimate.py 项目： EleutherAI/lm-evaluation-harness

def main():
    lm = DryrunLM()

    task_list = "arc_challenge,arc_easy,boolq,cola,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,record,rte,sciq,sst,triviaqa,webqs,wic,wikitext,winogrande,wnli,wsc"
    values = []
    for taskname in task_list.split(","):
        lm.tokencost = 0
        evaluator.evaluate(
            lm=lm,
            task_dict={taskname: tasks.get_task(taskname)()},
            num_fewshot=0,
            limit=None,
            bootstrap_iters=10,
            description_dict=None,
        )

        print(taskname, lm.tokencost)
        values.append([
            taskname,
            lm.tokencost,
            lm.tokencost / 1000 * 0.0008,
            lm.tokencost / 1000 * 0.0012,
            lm.tokencost / 1000 * 0.006,
            lm.tokencost / 1000 * 0.06,
        ])
    from pytablewriter import MarkdownTableWriter

    writer = MarkdownTableWriter()
    writer.headers = ["Task", "Tokens", "Ada", "Babbage", "Curie", "Davinci"]

    values.sort(key=lambda x: -x[1])
    totcost = sum([x[1] for x in values])
    values.append([
        "**Total**",
        totcost,
        totcost / 1000 * 0.0008,
        totcost / 1000 * 0.0012,
        totcost / 1000 * 0.006,
        totcost / 1000 * 0.06,
    ])

    writer.value_matrix = values

    print(writer.dumps())

示例#25

0

显示文件

def mk_table(datasets, train_chars, print_latex=False):
    values = []

    total_weight = sum([x[1] * x[0].size() for x in datasets])

    for dataset, weight in datasets:
        size = dataset.size()
        relative_weight = size * weight / total_weight
        values.append([dataset.name(), size, '{:.2%}'.format(relative_weight), '{:.4f}'.format(train_chars / size * relative_weight), size * weight, humanbytes(size / dataset.num_docs(), 'KiB')])
    
    values.sort(key=lambda x: -x[4])
    values.append(['**Total**', "", "", "", sum([x[4] for x in values]), humanbytes(sum([x[1] for x in values]) / sum(x[0].num_docs() for x in datasets), 'KiB')])
    values = [[x[0], humanbytes(x[1], 'GiB') if x[1] else "", x[2], x[3], humanbytes(x[4], 'GiB'), x[5]] for x in values]

    writer = MarkdownTableWriter()
    writer.table_name = "The Pile™"
    writer.headers = ["Component", "Raw Size", "Weight", "Epochs", "Effective Size", "Mean Document Size"]
    writer.value_matrix = values

    if print_latex:
        rows = []
        for row in values[:-1]:
            rows.append("        " + " & ".join(map(lambda x: str(x).replace('%', r'\%'), row)) + r" \\")
        totalrow = " & ".join(map(lambda x: r'\textbf{%s}' % str(x).replace('%', r'\%') if x else "", values[-1][1:])) + r" \\"
        latex = r"""
\begin{table*}[t!]
    \centering
    \begin{tabular}{l r r r r r}
    \toprule
        \textbf{Component} & \textbf{Raw Size} & \textbf{Weight} & \textbf{Copies} & \textbf{Effective Size} & \textbf{Mean Document Size} \\
        \midrule
""" + "\n".join(rows) + r"""
        \midrule
        \textbf{The Pile} & """ + totalrow + r"""
        \bottomrule
    \end{tabular}
\caption{Overview of datasets in \textit{The Pile} before deduplication. The Pile is distributed with a predefined up/down-sampling of the different constituent datasets.}
\label{table:pile_overview}
\end{table*}
        """
        print(latex)
    return writer.dumps()

示例#26

0

显示文件

def generate_markdown_table(input_params_yaml, output_markdown_file_path):
    try:
        with open(input_params_yaml, "r") as input:
            parameters = yaml.safe_load(input)["parameters"]
            parameter_fields = ["name", "description", "default"]

            writer = MarkdownTableWriter()
            writer.table_name = "Parameters"
            writer.styles = [
                Style(align="left", font_weight="bold"),
                Style(align="left"),
                Style(align="left"),
            ]
            writer.headers = [field.capitalize() for field in parameter_fields]
            writer.margin = 1

            writer.value_matrix = [
                list([parameter.get(field) for field in parameter_fields])
                for parameter in parameters
            ]

            if output_markdown_file_path:
                try:
                    with open(output_markdown_file_path, "w") as output:
                        writer.stream = output
                        writer.write_table()
                except Exception as e:
                    logging.error(
                        f"Failed to output Markdown to {output_markdown_file_path}",
                        e,
                    )
                    return 1
            else:
                print(writer.dumps())

            return 0
    except Exception as e:
        logging.error(
            f"Failed to generate Markdown from {input_params_yaml}", e
        )
        return 1

示例#27

0

显示文件

文件： calculation_documentation_generator.py 项目： Recidiviz/pulse-data

    def _get_metric_information(self, metric: Type[RecidivizMetric]) -> str:
        """Returns string contents for a metric markdown."""
        metric_table_id = DATAFLOW_METRICS_TO_TABLES[metric]
        metric_type = DATAFLOW_TABLES_TO_METRIC_TYPES[metric_table_id].value

        state_infos_list = sorted(
            self.state_metric_calculations_by_metric[metric_type],
            key=lambda info: (info.name, info.month_count),
        )
        headers = [
            "**State**",
            "**Number of Months Calculated**",
            "**Calculation Frequency**",
        ]
        table_matrix = [[
            f"[{state_info.name}](../../states/{self._normalize_string_for_path(state_info.name)}.md)",
            state_info.month_count if state_info.month_count else "N/A",
            state_info.frequency,
        ] for state_info in state_infos_list]
        writer = MarkdownTableWriter(headers=headers,
                                     value_matrix=table_matrix,
                                     margin=0)

        documentation = METRIC_DOCS_TEMPLATE.format(
            staging_link=BQ_LINK_TEMPLATE.format(
                project="recidiviz-staging",
                dataset_id="dataflow_metrics",
                table_id=metric_table_id,
            ),
            prod_link=BQ_LINK_TEMPLATE.format(
                project="recidiviz-123",
                dataset_id="dataflow_metrics",
                table_id=metric_table_id,
            ),
            metric_name=metric.__name__,
            description=metric.get_description(),
            metrics_cadence_table=writer.dumps(),
            metric_table_id=metric_table_id,
        )

        return documentation

示例#28

0

显示文件

文件： calculation_documentation_generator.py 项目： Recidiviz/pulse-data

 def _get_metrics_table_for_state(self, state_name: str) -> str:
     sorted_state_metric_calculations = self._get_sorted_state_metric_info()
     metric_names_to_tables = {
         metric.value: table
         for table, metric in DATAFLOW_TABLES_TO_METRIC_TYPES.items()
     }
     if state_name in sorted_state_metric_calculations:
         headers = [
             "**Metric**",
             "**Number of Months Calculated**",
             "**Calculation Frequency**",
         ]
         table_matrix = [[
             f"[{metric_info.name}](../metrics/{self.generic_types_by_metric_name[metric_names_to_tables[metric_info.name]].lower()}/{metric_names_to_tables[metric_info.name]}.md)",
             metric_info.month_count if metric_info.month_count else "N/A",
             metric_info.frequency,
         ] for metric_info in sorted_state_metric_calculations[state_name]]
         writer = MarkdownTableWriter(headers=headers,
                                      value_matrix=table_matrix,
                                      margin=0)
         return writer.dumps()
     return "_This state has no regularly calculated metrics._"

示例#29

0

显示文件

def pipeline_visualizer(num_stages, num_microbatches, include_all=False):
    stages = {}
    for stage_id in range(num_stages):
        steps = [i for i in TrainSchedule(micro_batches=num_microbatches, stages=num_stages - 1 ,
                                          stage_id=stage_id).steps()]
        steps = expand(steps, include_all=include_all)
        stages[stage_id] = steps
    value_matrix = [v for k, v in stages.items()]
    headers = ['GPU ID'] + [str(i) for i in range(len(stages[0]))]
    value_matrix = [[f'GPU {i}'] + value_matrix[i] for i in range(len(value_matrix))]
    writer = MarkdownTableWriter(
        table_name=f"Pipe Schedule\n",
        headers=headers,
        value_matrix=value_matrix
    )
    string = writer.dumps()
    all_steps = flatten(value_matrix)
    idle_time = len([i for i in all_steps if i is None])
    print(all_steps)
    non_idle_time = len([i for i in all_steps if (i is not None and 'GPU' not in i)])
    string += f'\nNum Devices: {num_stages}\nNum Microbatches: {num_microbatches} \n' \
              f'Idle Time: {idle_time}\nNon Idle Time: {non_idle_time}'
    return string

示例#30

0

显示文件

def main():
    lm = DryrunLM()

    values = []
    for taskname in list(tasks.TASK_REGISTRY.keys()):
        lm.tokencost = 0
        evaluator.evaluate(lm, {taskname: tasks.get_task(taskname)()}, False,
                           0, None)

        print(taskname, lm.tokencost)
        values.append([taskname, lm.tokencost, lm.tokencost / 1000 * 0.06])
    from pytablewriter import MarkdownTableWriter

    writer = MarkdownTableWriter()
    writer.headers = ["Task", "Tokens", "Davinci Cost"]

    values.sort(key=lambda x: -x[1])
    totcost = sum([x[1] for x in values])
    values.append(["**Total**", totcost, totcost / 1000 * 0.06])

    writer.value_matrix = values

    print(writer.dumps())