Пример #1
0
def run(
    input_file: KGTKFiles,
    prop_file: KGTKFiles,
    labels: str,
    aliases: str,
    descriptions: str,
    prop_declaration: bool,
    output_prefix: str,
    n: int,
    log_path: str,
    warning: bool,
    has_rank: bool,
    error_action: str,
    property_declaration_label: str,
    ignore_property_declarations_in_file: bool,
    filter_prop_file: bool,
    verbose: bool,
):
    # import modules locally
    from pathlib import Path
    from kgtk.generator import JsonGenerator
    import sys
    import gzip
    from kgtk.exceptions import KGTKException

    input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file)
    prop_kgtk_file: typing.Optional[
        Path] = KGTKArgumentParser.get_optional_input_file(
            prop_file, who="KGTK prop file")

    generator = JsonGenerator(
        input_file=input_kgtk_file,
        prop_file=prop_kgtk_file,
        label_set=labels,
        alias_set=aliases,
        description_set=descriptions,
        output_prefix=output_prefix,
        n=n,
        log_path=log_path,
        warning=warning,
        prop_declaration=prop_declaration,
        has_rank=has_rank,
        error_action=error_action,
        property_declaration_label=property_declaration_label,
        ignore_property_declarations_in_file=
        ignore_property_declarations_in_file,
        filter_prop_file=filter_prop_file,
        verbose=verbose,
    )
    generator.process()
Пример #2
0
def run(input_file: KGTKFiles,
        output_file: KGTKFiles,
        label_file: KGTKFiles,
        unmodified_row_file: KGTKFiles,
        matched_label_file: KGTKFiles,
        unmatched_label_file: KGTKFiles,

        input_select_column_name: typing.Optional[str],
        input_select_column_value: typing.Optional[str],
        input_lifting_column_names: typing.List[str],

        output_lifted_column_names: typing.List[str],
        output_lifted_column_suffix: str,
        output_select_column_value: str,

        label_select_column_name: typing.Optional[str],
        label_select_column_value: str,
        label_match_column_name: typing.Optional[str],
        label_value_column_name: typing.Optional[str],

        default_value: str,

        remove_label_records: bool = False,
        sort_lifted_labels: bool = True,
        suppress_duplicate_labels: bool = True,
        suppress_empty_columns: bool = False,
        ok_if_no_labels: bool = False,
        prefilter_labels: bool = False,
        input_is_presorted: bool = False,
        labels_are_presorted: bool = False,

        clear_before_lift: bool = False,
        overwrite: bool = False,

        output_only_modified_rows: bool = False,

        languages: typing.Optional[typing.List[str]] = None,
        prioritize: bool = False,

        use_label_envar: bool = False,
        lift_all_columns: bool = False,
        require_label_file: bool = False,
        force_input_mode_none: bool = False,

        errors_to_stdout: bool = False,
        errors_to_stderr: bool = True,
        show_options: bool = False,
        verbose: bool = False,
        very_verbose: bool = False,

        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
)->int:
    # import modules locally
    import os
    from pathlib import Path
    import sys
    
    from kgtk.exceptions import KGTKException
    from kgtk.lift.kgtklift import KgtkLift
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.io.kgtkwriter import KgtkWriter
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file)
    output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file)
    label_kgtk_file: typing.Optional[Path] = KGTKArgumentParser.get_optional_input_file(label_file, who="KGTK label file")
    unmodified_row_kgtk_file: typing.Optional[Path] = KGTKArgumentParser.get_optional_output_file(unmodified_row_file, who="KGTK unmodified row output file")
    matched_label_kgtk_file: typing.Optional[Path] = KGTKArgumentParser.get_optional_output_file(matched_label_file, who="KGTK matched label output file")
    unmatched_label_kgtk_file: typing.Optional[Path] = KGTKArgumentParser.get_optional_output_file(unmatched_label_file, who="KGTK unmatched label output file")

    # Select where to send error messages, defaulting to stderr.
    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr

    # Build the option structures.
    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input", fallback=True)
    label_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="label", fallback=True)
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)

    # Show the final option structures for debugging and documentation.
    if show_options:
        print("--input-file=%s" % str(input_kgtk_file), file=error_file, flush=True)
        print("--output-file=%s" % str(output_kgtk_file), file=error_file, flush=True)
        if label_kgtk_file is not None:
            print("--label-file=%s" % label_kgtk_file, file=error_file, flush=True)
        if unmodified_row_kgtk_file is not None:
            print("--unmodified-row-output-file=%s" % unmodified_row_kgtk_file, file=error_file, flush=True)
        if matched_label_kgtk_file is not None:
            print("--matched-label-output-file=%s" % matched_label_kgtk_file, file=error_file, flush=True)
        if unmatched_label_kgtk_file is not None:
            print("--unmatched-label-output-file=%s" % unmatched_label_kgtk_file, file=error_file, flush=True)

        if input_select_column_name is not None:
            print("--input-select-column=%s" % input_select_column_name, file=error_file, flush=True)
        if input_select_column_value is not None:
            print("--input-select-value=%s" % input_select_column_value, file=error_file, flush=True)
        if input_lifting_column_names is not None and len(input_lifting_column_names) > 0:
            print("--columns-to-lift %s" % " ".join(input_lifting_column_names), file=error_file, flush=True)
        if output_lifted_column_names is not None and len(output_lifted_column_names) > 0:
            print("--columns-to-write %s" % " ".join(output_lifted_column_names), file=error_file, flush=True)

        print("--lift-suffix=%s" % output_lifted_column_suffix, file=error_file, flush=True)
        if output_select_column_value is not None:
            print("--update-select-value=%s" % output_select_column_value, file=error_file, flush=True)


        if label_select_column_name is not None:
            print("--label-select-column=%s" % label_select_column_name, file=error_file, flush=True)
        print("--label-select-value=%s" % label_select_column_value, file=error_file, flush=True)
        if label_match_column_name is not None:
            print("--label-match-column=%s" % label_match_column_name, file=error_file, flush=True)
        if label_value_column_name is not None:
            print("--label-value-column=%s" % label_value_column_name, file=error_file, flush=True)

        print("--default-value=%s" % repr(default_value), file=error_file, flush=True)
        print("--remove-label-records=%s" % repr(remove_label_records), file=error_file, flush=True)
        print("--sort-lifted-labels=%s" % repr(sort_lifted_labels), file=error_file, flush=True)
        print("--suppress-duplicate-labels=%s" % repr(suppress_duplicate_labels), file=error_file, flush=True)
        print("--suppress-empty-columns=%s" % repr(suppress_empty_columns), file=error_file, flush=True)
        print("--ok-if-no-labels=%s" % repr(ok_if_no_labels), file=error_file, flush=True)
        print("--prefilter-labels=%s" % repr(prefilter_labels), file=error_file, flush=True)
        print("--input-file-is-presorted=%s" % repr(input_is_presorted), file=error_file, flush=True)
        print("--label-file-is-presorted=%s" % repr(labels_are_presorted), file=error_file, flush=True)
        print("--clear-before-lift=%s" % repr(clear_before_lift), file=error_file, flush=True)
        print("--overwrite=%s" % repr(overwrite), file=error_file, flush=True)
        print("--output-only-modified-rows=%s" % repr(output_only_modified_rows), file=error_file, flush=True)

        if languages is not None:
            print("--languages %s" % " ".join(repr(l) for l in languages), file=error_file, flush=True)
        print("--prioritize=%s" % repr(prioritize), file=error_file, flush=True)
        
        print("--use-label-envar=%s" % repr(use_label_envar), file=error_file, flush=True)
        print("--lift-all-columns=%s" % repr(lift_all_columns), file=error_file, flush=True)
        print("--require-label-files=%s" % repr(require_label_file), file=error_file, flush=True)
        print("--force-input-mode-none=%s" % repr(force_input_mode_none), file=error_file, flush=True)
        input_reader_options.show(out=error_file, who="input")
        label_reader_options.show(out=error_file, who="label")
        value_options.show(out=error_file)
        print("=======", file=error_file, flush=True)

    # Should the following functionality be moved to KgtkLift?
    if label_kgtk_file is None and use_label_envar:
        label_file_envar: str = 'KGTK_LABEL_FILE' # TODO: Move this to a common file.
        label_file_envar_value: typing.Optional[str] = os.getenv(label_file_envar)
        if label_file_envar_value is not None:
            label_kgtk_file = Path(label_file_envar_value)
            if verbose:
                print("Using label file %s from envar %s" % (repr(label_file_envar_value), repr(label_file_envar)), file=error_file, flush=True)

    if require_label_file and label_kgtk_file is None:
        raise KGTKException("A label file must be specified using --label-file or KGTK_LABEL_FILE")

    try:
        kl: KgtkLift = KgtkLift(
            input_file_path=input_kgtk_file,
            label_file_path=label_kgtk_file,
            output_file_path=output_kgtk_file,
            unmodified_row_file_path=unmodified_row_kgtk_file,
            matched_label_file_path=matched_label_kgtk_file,
            unmatched_label_file_path=unmatched_label_kgtk_file,

            input_select_column_name=input_select_column_name,
            input_select_column_value=input_select_column_value,
            input_lifting_column_names=input_lifting_column_names,

            output_lifted_column_suffix=output_lifted_column_suffix,
            output_select_column_value=output_select_column_value,
            output_lifted_column_names=output_lifted_column_names,

            label_select_column_name=label_select_column_name,
            label_select_column_value=label_select_column_value,
            label_match_column_name=label_match_column_name,
            label_value_column_name=label_value_column_name,

            default_value=default_value,

            remove_label_records=remove_label_records,
            sort_lifted_labels=sort_lifted_labels,
            suppress_duplicate_labels=suppress_duplicate_labels,
            suppress_empty_columns=suppress_empty_columns,
            ok_if_no_labels=ok_if_no_labels,
            prefilter_labels=prefilter_labels,
            input_is_presorted=input_is_presorted,
            labels_are_presorted=labels_are_presorted,

            clear_before_lift=clear_before_lift,
            overwrite=overwrite,

            output_only_modified_rows=output_only_modified_rows,

            languages=languages,
            prioritize=prioritize,

            lift_all_columns=lift_all_columns,
            force_input_mode_none=force_input_mode_none,

            input_reader_options=input_reader_options,
            label_reader_options=label_reader_options,
            value_options=value_options,
            error_file=error_file,
            verbose=verbose,
            very_verbose=very_verbose,
        )
        
        kl.process()

        return 0

    except SystemExit as e:
        raise KGTKException("Exit requested")
    except Exception as e:
        raise KGTKException(str(e))
Пример #3
0
def run(
        input_file: KGTKFiles,
        output_file: KGTKFiles,
        reject_file: KGTKFiles,
        namespace_file: KGTKFiles,
        updated_namespace_file: KGTKFiles,
        namespace_id_prefix: str,
        namespace_id_use_uuid: bool,
        namespace_id_counter: int,
        namespace_id_zfill: int,
        output_only_used_namespaces: bool,
        allow_lax_uri: bool,
        local_namespace_prefix: str,
        local_namespace_use_uuid: bool,
        prefix_expansion_label: str,
        structured_value_label: str,
        structured_uri_label: str,
        newnode_prefix: str,
        newnode_use_uuid: bool,
        newnode_counter: int,
        newnode_zfill: int,
        build_id: bool,
        escape_pipes: bool,
        validate: bool,
        override_uuid: typing.Optional[str],
        errors_to_stdout: bool = False,
        errors_to_stderr: bool = True,
        show_options: bool = False,
        verbose: bool = False,
        very_verbose: bool = False,
        **kwargs  # Whatever KgtkFileOptions and KgtkValueOptions want.
) -> int:
    # import modules locally
    from pathlib import Path
    import sys

    from kgtk.exceptions import KGTKException
    from kgtk.imports.kgtkntriples import KgtkNtriples
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.io.kgtkwriter import KgtkWriter
    from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    # Select where to send error messages, defaulting to stderr.
    input_file_paths: typing.List[
        Path] = KGTKArgumentParser.get_input_file_list(input_file)
    output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file)
    reject_file_path: typing.Optional[
        Path] = KGTKArgumentParser.get_optional_output_file(
            reject_file, who="KGTK reject file")

    namespace_kgtk_file: typing.Optional[
        Path] = KGTKArgumentParser.get_optional_input_file(
            namespace_file, who="KGTK namespace file")
    updated_namespace_kgtk_file: typing.Optional[
        Path] = KGTKArgumentParser.get_optional_output_file(
            updated_namespace_file, who="KGTK updated namespace file")

    # Select where to send error messages, defaulting to stderr.
    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr

    # Build the option structures.
    idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_dict(
        kwargs)
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)

    # Show the final option structures for debugging and documentation.
    if show_options:
        print("--input-files %s" %
              " ".join([str(path) for path in input_file_paths]),
              file=error_file,
              flush=True)
        print("--output-file=%s" % str(output_kgtk_file),
              file=error_file,
              flush=True)
        if reject_file_path is not None:
            print("--reject-file=%s" % str(reject_file_path),
                  file=error_file,
                  flush=True)
        if namespace_kgtk_file is not None:
            print("--namespace-file=%s" % str(namespace_kgtk_file),
                  file=error_file,
                  flush=True)
        if updated_namespace_kgtk_file is not None:
            print("--updated-namespace-file=%s" %
                  str(updated_namespace_kgtk_file),
                  file=error_file,
                  flush=True)

        print("--namespace-id-prefix %s" % namespace_id_prefix,
              file=error_file,
              flush=True)
        print("--namespace-id-use-uuid %s" % str(namespace_id_use_uuid),
              file=error_file,
              flush=True)
        print("--namespace-id-counter %s" % str(namespace_id_counter),
              file=error_file,
              flush=True)
        print("--namespace-id-zfill %s" % str(namespace_id_zfill),
              file=error_file,
              flush=True)
        print("--output-only-used-namespaces %s" %
              str(output_only_used_namespaces),
              file=error_file,
              flush=True)

        print("--allow-lax-uri %s" % str(allow_lax_uri),
              file=error_file,
              flush=True)

        print("--local-namespace-prefix %s" % local_namespace_prefix,
              file=error_file,
              flush=True)
        print("--local-namespace-use-uuid %s" % str(local_namespace_use_uuid),
              file=error_file,
              flush=True)

        print("--prefix-expansion-label %s" % prefix_expansion_label,
              file=error_file,
              flush=True)
        print("--structured-value-label %s" % structured_value_label,
              file=error_file,
              flush=True)
        print("--structured-uri-label %s" % structured_uri_label,
              file=error_file,
              flush=True)

        print("--newnode-prefix %s" % newnode_prefix,
              file=error_file,
              flush=True)
        print("--newnode-use-uuid %s" % str(newnode_use_uuid),
              file=error_file,
              flush=True)
        print("--newnode-counter %s" % str(newnode_counter),
              file=error_file,
              flush=True)
        print("--newnode-zfill %s" % str(newnode_zfill),
              file=error_file,
              flush=True)

        print("--build-id=%s" % str(build_id), file=error_file, flush=True)

        print("--escape-pipes=%s" % str(escape_pipes),
              file=error_file,
              flush=True)

        print("--validate=%s" % str(validate), file=error_file, flush=True)

        print("--override-uuid=%s" % str(override_uuid),
              file=error_file,
              flush=True)

        idbuilder_options.show(out=error_file)
        reader_options.show(out=error_file)
        value_options.show(out=error_file)
        print("=======", file=error_file, flush=True)

    try:
        kn: KgtkNtriples = KgtkNtriples(
            input_file_paths=input_file_paths,
            output_file_path=output_kgtk_file,
            reject_file_path=reject_file_path,
            updated_namespace_file_path=updated_namespace_kgtk_file,
            namespace_file_path=namespace_kgtk_file,
            namespace_id_prefix=namespace_id_prefix,
            namespace_id_use_uuid=namespace_id_use_uuid,
            namespace_id_counter=namespace_id_counter,
            namespace_id_zfill=namespace_id_zfill,
            output_only_used_namespaces=output_only_used_namespaces,
            newnode_prefix=newnode_prefix,
            newnode_use_uuid=newnode_use_uuid,
            newnode_counter=newnode_counter,
            newnode_zfill=newnode_zfill,
            allow_lax_uri=allow_lax_uri,
            local_namespace_prefix=local_namespace_prefix,
            local_namespace_use_uuid=local_namespace_use_uuid,
            prefix_expansion_label=prefix_expansion_label,
            structured_value_label=structured_value_label,
            structured_uri_label=structured_uri_label,
            build_id=build_id,
            escape_pipes=escape_pipes,
            validate=validate,
            override_uuid=override_uuid,
            idbuilder_options=idbuilder_options,
            reader_options=reader_options,
            value_options=value_options,
            error_file=error_file,
            verbose=verbose,
            very_verbose=very_verbose)

        kn.process()

        return 0

    except SystemExit as e:
        raise KGTKException("Exit requested")
    except Exception as e:
        raise KGTKException(str(e))
Пример #4
0
def run(
        input_file: KGTKFiles,
        output_file: KGTKFiles,
        label_file: KGTKFiles,
        input_select_column_name: typing.Optional[str],
        input_select_column_value: typing.Optional[str],
        input_lifting_column_names: typing.List[str],
        output_lifted_column_names: typing.List[str],
        output_lifted_column_suffix: str,
        output_select_column_value: str,
        label_select_column_name: typing.Optional[str],
        label_select_column_value: str,
        label_match_column_name: typing.Optional[str],
        label_value_column_name: typing.Optional[str],
        remove_label_records: bool = False,
        sort_lifted_labels: bool = True,
        suppress_duplicate_labels: bool = True,
        suppress_empty_columns: bool = False,
        ok_if_no_labels: bool = False,
        prefilter_labels: bool = False,
        input_is_presorted: bool = False,
        labels_are_presorted: bool = False,
        errors_to_stdout: bool = False,
        errors_to_stderr: bool = True,
        show_options: bool = False,
        verbose: bool = False,
        very_verbose: bool = False,
        **kwargs  # Whatever KgtkFileOptions and KgtkValueOptions want.
) -> int:
    # import modules locally
    from pathlib import Path
    import sys

    from kgtk.exceptions import KGTKException
    from kgtk.lift.kgtklift import KgtkLift
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.io.kgtkwriter import KgtkWriter
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file)
    output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file)
    label_kgtk_file: typing.Optional[
        Path] = KGTKArgumentParser.get_optional_input_file(
            label_file, who="KGTK label file")

    # Select where to send error messages, defaulting to stderr.
    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr

    # Build the option structures.
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)

    # Show the final option structures for debugging and documentation.
    if show_options:
        print("--input-file=%s" % str(input_kgtk_file),
              file=error_file,
              flush=True)
        print("--output-file=%s" % str(output_kgtk_file),
              file=error_file,
              flush=True)
        if label_kgtk_file is not None:
            print("-label-file=%s" % label_kgtk_file,
                  file=error_file,
                  flush=True)

        if input_select_column_name is not None:
            print("--input-select-column=%s" % input_select_column_name,
                  file=error_file,
                  flush=True)
        if input_select_column_value is not None:
            print("--input-select-value=%s" % input_select_column_value,
                  file=error_file,
                  flush=True)
        if input_lifting_column_names is not None and len(
                input_lifting_column_names) > 0:
            print("--columns-to-lift %s" %
                  " ".join(input_lifting_column_names),
                  file=error_file,
                  flush=True)
        if output_lifted_column_names is not None and len(
                output_lifted_column_names) > 0:
            print("--columns-to-write %s" %
                  " ".join(output_lifted_column_names),
                  file=error_file,
                  flush=True)

        print("--lift-suffix=%s" % output_lifted_column_suffix,
              file=error_file,
              flush=True)
        if output_select_column_value is not None:
            print("--update-select-value=%s" % output_select_column_value,
                  file=error_file,
                  flush=True)

        if label_select_column_name is not None:
            print("--label-select-column=%s" % label_select_column_name,
                  file=error_file,
                  flush=True)
        print("--label-select-value=%s" % label_select_column_value,
              file=error_file,
              flush=True)
        if label_match_column_name is not None:
            print("--label-match-column=%s" % label_match_column_name,
                  file=error_file,
                  flush=True)
        if label_value_column_name is not None:
            print("--label-value-column=%s" % label_value_column_name,
                  file=error_file,
                  flush=True)

        print("--remove-label-records=%s" % str(remove_label_records))
        print("--sort-lifted-labels=%s" % str(sort_lifted_labels))
        print("--suppress-duplicate-labels=%s" %
              str(suppress_duplicate_labels))
        print("--suppress-empty-columns=%s" % str(suppress_empty_columns))
        print("--ok-if-no-labels=%s" % str(ok_if_no_labels))
        print("--prefilter-labels=%s" % str(prefilter_labels))
        print("--input-file-is-presorted=%s" % str(input_is_presorted))
        print("--label-file-is-presorted=%s" % str(labels_are_presorted))
        reader_options.show(out=error_file)
        value_options.show(out=error_file)
        print("=======", file=error_file, flush=True)

    try:
        kl: KgtkLift = KgtkLift(
            input_file_path=input_kgtk_file,
            label_file_path=label_kgtk_file,
            output_file_path=output_kgtk_file,
            input_select_column_name=input_select_column_name,
            input_select_column_value=input_select_column_value,
            input_lifting_column_names=input_lifting_column_names,
            output_lifted_column_suffix=output_lifted_column_suffix,
            output_select_column_value=output_select_column_value,
            output_lifted_column_names=output_lifted_column_names,
            label_select_column_name=label_select_column_name,
            label_select_column_value=label_select_column_value,
            label_match_column_name=label_match_column_name,
            label_value_column_name=label_value_column_name,
            remove_label_records=remove_label_records,
            sort_lifted_labels=sort_lifted_labels,
            suppress_duplicate_labels=suppress_duplicate_labels,
            suppress_empty_columns=suppress_empty_columns,
            ok_if_no_labels=ok_if_no_labels,
            prefilter_labels=prefilter_labels,
            input_is_presorted=input_is_presorted,
            labels_are_presorted=labels_are_presorted,
            reader_options=reader_options,
            value_options=value_options,
            error_file=error_file,
            verbose=verbose,
            very_verbose=very_verbose,
        )

        kl.process()

        return 0

    except SystemExit as e:
        raise KGTKException("Exit requested")
    except Exception as e:
        raise KGTKException(str(e))