def preprocess_query_options(input_files: KGTKFiles, **options): """Preprocess and normalize query options before they are passed on. """ import_modules() debug = options.get('_debug', False) expert = options.get('_expert', False) loglevel = debug and 1 or 0 if debug and expert: loglevel = 2 print('OPTIONS:', options) options['loglevel'] = loglevel # normalize input path objects to strings: inputs = input_files # do not accept the stdin default for empty inputs, but allow empty inputs for --show-cache: if len(inputs) > 0: inputs = [str(f) for f in KGTKArgumentParser.get_input_file_list(input_files)] options['input_files'] = inputs # normalize output to an open writeable stream: output = options.get('output') if output == '-': output = sys.stdout if isinstance(output, str): output = sqlstore.open_to_write(output, mode='wt') options['output'] = output optionals = [(pat, where) for (opt, pat, where) in options.get('match_options', []) if opt in ('--opt', '--optional')] if optionals: options['optionals'] = optionals with_ = (options.get('with'), options.get('with_where')) if with_.count(None) < len(with_): options['with'] = with_ parameters = parse_query_parameters(regular=options.get('regular_paras') or [], string=options.get('string_paras') or [], lqstring=options.get('lqstring_paras') or []) options['parameters'] = parameters imports = options.get('import') graph_cache = options.get('graph_cache_file') if graph_cache is None or len(graph_cache) == 0: graph_cache = os.getenv('KGTK_GRAPH_CACHE') if graph_cache is None or len(graph_cache) == 0: graph_cache = DEFAULT_GRAPH_CACHE_FILE options['graph_cache_file'] = graph_cache return options
def __call__(self, parser, namespace, values, option_string=None): input_options = getattr(namespace, 'input_file_options', {}) or {} inputs = KGTKArgumentParser.get_input_file_list(getattr(namespace, 'input_files', [])) if len(inputs) < 1: raise KGTKException('out-of-place input option: %s' % option_string) # normalize path objects to strings: input_file = str(inputs[-1]) # handle boolean args (also requires nargs=0): if self.type == bool: values = True # we use self.dest as the key for this particular option: input_options.setdefault(input_file, {})[self.dest] = values setattr(namespace, 'input_file_options', input_options)
def run(input_files: KGTKFiles, output=None, gz=False, bz2=False, xz=False, _debug=False): """Run zconcat according to the provided command-line arguments. """ # TO DO: figure out how to properly access shared --debug option try: inputs: typing.List[str] = [str(input_file) for input_file in KGTKArgumentParser.get_input_file_list(input_files)] commands = build_command(inputs=inputs, output=output, gz=gz, bz2=bz2, xz=xz) return run_sh_commands(commands, debug=_debug).exit_code except sh.SignalException_SIGPIPE: # cleanup in case we piped and terminated prematurely: sys.stdout.flush() except Exception as e: #import traceback #traceback.print_tb(sys.exc_info()[2], 10) raise KGTKException('INTERNAL ERROR: ' + str(e) + '\n')
def run( input_file: KGTKFiles, output_file: KGTKFiles, reject_file: KGTKFiles, namespace_file: KGTKFiles, updated_namespace_file: KGTKFiles, namespace_id_prefix: str, namespace_id_use_uuid: bool, namespace_id_counter: int, namespace_id_zfill: int, output_only_used_namespaces: bool, allow_lax_uri: bool, local_namespace_prefix: str, local_namespace_use_uuid: bool, prefix_expansion_label: str, structured_value_label: str, structured_uri_label: str, newnode_prefix: str, newnode_use_uuid: bool, newnode_counter: int, newnode_zfill: int, build_id: bool, escape_pipes: bool, validate: bool, override_uuid: typing.Optional[str], errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: # import modules locally from pathlib import Path import sys from kgtk.exceptions import KGTKException from kgtk.imports.kgtkntriples import KgtkNtriples from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.io.kgtkwriter import KgtkWriter from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions # Select where to send error messages, defaulting to stderr. input_file_paths: typing.List[ Path] = KGTKArgumentParser.get_input_file_list(input_file) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) reject_file_path: typing.Optional[ Path] = KGTKArgumentParser.get_optional_output_file( reject_file, who="KGTK reject file") namespace_kgtk_file: typing.Optional[ Path] = KGTKArgumentParser.get_optional_input_file( namespace_file, who="KGTK namespace file") updated_namespace_kgtk_file: typing.Optional[ Path] = KGTKArgumentParser.get_optional_output_file( updated_namespace_file, who="KGTK updated namespace file") # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_dict( kwargs) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("--input-files %s" % " ".join([str(path) for path in input_file_paths]), file=error_file, flush=True) print("--output-file=%s" % str(output_kgtk_file), file=error_file, flush=True) if reject_file_path is not None: print("--reject-file=%s" % str(reject_file_path), file=error_file, flush=True) if namespace_kgtk_file is not None: print("--namespace-file=%s" % str(namespace_kgtk_file), file=error_file, flush=True) if updated_namespace_kgtk_file is not None: print("--updated-namespace-file=%s" % str(updated_namespace_kgtk_file), file=error_file, flush=True) print("--namespace-id-prefix %s" % namespace_id_prefix, file=error_file, flush=True) print("--namespace-id-use-uuid %s" % str(namespace_id_use_uuid), file=error_file, flush=True) print("--namespace-id-counter %s" % str(namespace_id_counter), file=error_file, flush=True) print("--namespace-id-zfill %s" % str(namespace_id_zfill), file=error_file, flush=True) print("--output-only-used-namespaces %s" % str(output_only_used_namespaces), file=error_file, flush=True) print("--allow-lax-uri %s" % str(allow_lax_uri), file=error_file, flush=True) print("--local-namespace-prefix %s" % local_namespace_prefix, file=error_file, flush=True) print("--local-namespace-use-uuid %s" % str(local_namespace_use_uuid), file=error_file, flush=True) print("--prefix-expansion-label %s" % prefix_expansion_label, file=error_file, flush=True) print("--structured-value-label %s" % structured_value_label, file=error_file, flush=True) print("--structured-uri-label %s" % structured_uri_label, file=error_file, flush=True) print("--newnode-prefix %s" % newnode_prefix, file=error_file, flush=True) print("--newnode-use-uuid %s" % str(newnode_use_uuid), file=error_file, flush=True) print("--newnode-counter %s" % str(newnode_counter), file=error_file, flush=True) print("--newnode-zfill %s" % str(newnode_zfill), file=error_file, flush=True) print("--build-id=%s" % str(build_id), file=error_file, flush=True) print("--escape-pipes=%s" % str(escape_pipes), file=error_file, flush=True) print("--validate=%s" % str(validate), file=error_file, flush=True) print("--override-uuid=%s" % str(override_uuid), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) try: kn: KgtkNtriples = KgtkNtriples( input_file_paths=input_file_paths, output_file_path=output_kgtk_file, reject_file_path=reject_file_path, updated_namespace_file_path=updated_namespace_kgtk_file, namespace_file_path=namespace_kgtk_file, namespace_id_prefix=namespace_id_prefix, namespace_id_use_uuid=namespace_id_use_uuid, namespace_id_counter=namespace_id_counter, namespace_id_zfill=namespace_id_zfill, output_only_used_namespaces=output_only_used_namespaces, newnode_prefix=newnode_prefix, newnode_use_uuid=newnode_use_uuid, newnode_counter=newnode_counter, newnode_zfill=newnode_zfill, allow_lax_uri=allow_lax_uri, local_namespace_prefix=local_namespace_prefix, local_namespace_use_uuid=local_namespace_use_uuid, prefix_expansion_label=prefix_expansion_label, structured_value_label=structured_value_label, structured_uri_label=structured_uri_label, build_id=build_id, escape_pipes=escape_pipes, validate=validate, override_uuid=override_uuid, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose) kn.process() return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run(input_files: KGTKFiles, **options): """Run Kypher query according to the provided command-line arguments. """ try: import_modules() debug = options.get('_debug', False) expert = options.get('_expert', False) loglevel = debug and 1 or 0 if debug and expert: loglevel = 2 print('OPTIONS:', options) # normalize path objects to strings: inputs = [str(f) for f in KGTKArgumentParser.get_input_file_list(input_files)] if len(inputs) == 0: raise KGTKException('At least one input needs to be supplied') output = options.get('output') if output == '-': output = sys.stdout if isinstance(output, str): output = sqlstore.open_to_write(output, mode='wt') parameters = parse_query_parameters(regular=options.get('regular_paras') or [], string=options.get('string_paras') or [], lqstring=options.get('lqstring_paras') or []) try: graph_cache = options.get('graph_cache_file') store = sqlstore.SqliteStore(graph_cache, create=not os.path.exists(graph_cache), loglevel=loglevel) query = kyquery.KgtkQuery(inputs, store, loglevel=loglevel, options=options.get('input_file_options'), query=options.get('query'), match=options.get('match'), where=options.get('where'), ret=options.get('return_'), order=options.get('order'), skip=options.get('skip'), limit=options.get('limit'), parameters=parameters, index=options.get('index')) explain = options.get('explain') if explain is not None: result = query.explain(explain) output.write(result) else: result = query.execute() # we are forcing \n line endings here instead of \r\n, since those # can be re/imported efficiently with the new SQLite import command; # we force `escapechar' back to None to avoid generation of double # backslashes as in 'Buffalo \'66', which in turn will now raise errors # if separators in fields are encountered (which seems what we want): csvwriter = csv.writer(output, dialect=None, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar=None, lineterminator='\n', escapechar=None) if not options.get('no_header'): csvwriter.writerow(query.result_header) csvwriter.writerows(result) output.flush() finally: store.close() if output is not None and output is not sys.stdout: output.close() except sh.SignalException_SIGPIPE: # hack to work around Python3 issue when stdout is gone when we try to report an exception; # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager: sys.stdout = os.fdopen(1) except KGTKException as e: raise e except Exception as e: raise KGTKException(str(e) + '\n')
def run(input_files: KGTKFiles, errors_to_stdout: bool = False, errors_to_stderr: bool = False, header_only: bool = False, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkReaderOptions and KgtkValueOptions want. )->int: # import modules locally from kgtk.exceptions import KGTKException kgtk_files: typing.List[Path] = KGTKArgumentParser.get_input_file_list(input_files) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("--input-files: %s" % " ".join((str(kgtk_file) for kgtk_file in kgtk_files)), file=error_file) print("--header-only=%s" % str(header_only), file=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) try: kgtk_file: Path for kgtk_file in kgtk_files: if verbose: print("\n====================================================", flush=True) if str(kgtk_file) != "-": print("Validating '%s'" % str(kgtk_file), file=error_file, flush=True) else: print ("Validating from stdin", file=error_file, flush=True) kr: KgtkReader = KgtkReader.open(kgtk_file, error_file=error_file, options=reader_options, value_options=value_options, verbose=verbose, very_verbose=very_verbose) if header_only: kr.close() if verbose: print("Validated the header only.", file=error_file, flush=True) else: line_count: int = 0 row: typing.List[str] for row in kr: line_count += 1 if verbose: print("Validated %d data lines" % line_count, file=error_file, flush=True) return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run(input_files: KGTKFiles, output_file: KGTKFiles, output_format: typing.Optional[str], output_column_names: typing.Optional[typing.List[str]], old_column_names: typing.Optional[typing.List[str]], new_column_names: typing.Optional[typing.List[str]], errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. )->int: # import modules locally from kgtk.exceptions import KGTKException from kgtk.join.kgtkcat import KgtkCat from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.io.kgtkwriter import KgtkWriter from kgtk.value.kgtkvalueoptions import KgtkValueOptions input_file_paths: typing.List[Path] = KGTKArgumentParser.get_input_file_list(input_files) output_file_path: Path = KGTKArgumentParser.get_output_file(output_file) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # print("cat running", file=error_file, flush=True) # *** # TODO: check that at most one input file is stdin? # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("--input-files %s" % " ".join((str(input_file_path) for input_file_path in input_file_paths)), file=error_file, flush=True) print("--output-file=%s" % str(output_file_path), file=error_file, flush=True) if output_format is not None: print("--output-format=%s" % output_format, file=error_file, flush=True) if output_column_names is not None: print("--output-coloumns %s" % " ".join(output_column_names), file=error_file, flush=True) if old_column_names is not None: print("--old-columns %s" % " ".join(old_column_names), file=error_file, flush=True) if new_column_names is not None: print("--new-columns %s" % " ".join(new_column_names), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) # Check for comsistent options. argparse doesn't support this yet. if output_column_names is not None and len(output_column_names) > 0: if (old_column_names is not None and len(old_column_names) > 0) or \ (new_column_names is not None and len(new_column_names) > 0): raise KGTKException("When --output-columns is used, --old-columns and --new-columns may not be used.") elif (old_column_names is not None and len(old_column_names) > 0) ^ \ (new_column_names is not None and len(new_column_names) > 0): raise KGTKException("Both --old-columns and --new-columns must be used when either is used.") elif (old_column_names is not None and len(old_column_names) > 0) and \ (new_column_names is not None and len(new_column_names) > 0): if len(old_column_names) != len(new_column_names): raise KGTKException("Both --old-columns and --new-columns must have the same number of columns.") try: kc: KgtkCat = KgtkCat(input_file_paths=input_file_paths, output_path=output_file_path, output_format=output_format, output_column_names=output_column_names, old_column_names=old_column_names, new_column_names=new_column_names, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose ) kc.process() # print("cat done", file=error_file, flush=True) # *** return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run(input_file: KGTKFiles, entity_label_files: KGTKFiles, output_file: KGTKFiles, label_properties: typing.Optional[typing.List[str]], description_properties: typing.Optional[typing.List[str]], isa_properties: typing.Optional[typing.List[str]], has_properties: typing.Optional[typing.List[str]], property_values: typing.Optional[typing.List[str]], sentence_label: str, explain: bool, presorted: bool, add_entity_labels_from_input: bool, errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. )->int: # import modules locally from pathlib import Path import sys from kgtk.exceptions import KGTKException from kgtk.gt.lexicalize_utils import Lexicalize from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.io.kgtkwriter import KgtkWriter from kgtk.value.kgtkvalueoptions import KgtkValueOptions input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file) entity_label_kgtk_files: typing.List[Path] = KGTKArgumentParser.get_input_file_list(entity_label_files, who="The entity label file(s)", default_stdin=False) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) if label_properties is None: label_properties = DEFAULT_LABEL_PROPERTIES if description_properties is None: description_properties = DEFAULT_DESCRIPTION_PROPERTIES if isa_properties is None: isa_properties = DEFAULT_ISA_PROPERTIES if has_properties is None: has_properties = DEFAULT_HAS_PROPERTIES if property_values is None: property_values = DEFAULT_PROPERTY_VALUES # Show the final option structures for debugging and documentation. if show_options: print("--input-file=%s" % str(input_kgtk_file), file=error_file, flush=True) if len(entity_label_kgtk_files) > 0: print("--entity-label-files %s" % " ".join([str(f) for f in entity_label_kgtk_files]), file=error_file, flush=True) print("--output-file=%s" % str(output_kgtk_file), file=error_file, flush=True) if len(label_properties) > 0: print("--label-properties %s" % " ".join(label_properties), file=error_file, flush=True) if len(description_properties) > 0: print("--description-properties %s" % " ".join(description_properties), file=error_file, flush=True) if len(isa_properties) > 0: print("--isa-properties %s" % " ".join(isa_properties), file=error_file, flush=True) if len(has_properties) > 0: print("--has-properties %s" % " ".join(has_properties), file=error_file, flush=True) if len(property_values) > 0: print("--property-values %s" % " ".join(property_values), file=error_file, flush=True) print("--sentence-label=%s" % str(sentence_label), file=error_file, flush=True) print("--explain=%s" % str(explain), file=error_file, flush=True) print("--presorted=%s" % str(presorted), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) lexer: Lexicalize = Lexicalize(label_properties, description_properties, isa_properties, has_properties, property_values, sentence_label, explain=explain, error_file=error_file, verbose=verbose, very_verbose=very_verbose) if len(entity_label_kgtk_files) > 0: lexer.load_entity_label_files(entity_label_kgtk_files, error_file, reader_options, value_options, label_properties=label_properties, verbose=verbose) kr: typing.Optional[KgtkReader] = None kw: typing.Optional[KgtkWriter] = None try: if verbose: print("Opening the input file %s" % str(input_kgtk_file), file=error_file, flush=True) kr = KgtkReader.open(input_kgtk_file, options=reader_options, value_options = value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose, ) if kr.node1_column_idx < 0: raise KGTKException("Missing column: node1 or alias") if kr.label_column_idx < 0: raise KGTKException("Missing column: label or alias") if kr.node2_column_idx < 0: raise KGTKException("Missing column: node2 or alias") if verbose: print("node1 column index = {}".format(kr.node1_column_idx), file=error_file, flush=True) print("label column index = {}".format(kr.label_column_idx), file=error_file, flush=True) print("node2 column index = {}".format(kr.node2_column_idx), file=error_file, flush=True) output_columns: typing.List[str] = OUTPUT_COLUMNS.copy() if explain: output_columns.append("explaination") if verbose: print("Including an explaination column in the output.", file=error_file, flush=True) if verbose: print("Opening the output file %s" % str(output_kgtk_file), file=error_file, flush=True) kw = KgtkWriter.open(output_columns, output_kgtk_file, require_all_columns=True, prohibit_extra_columns=True, fill_missing_columns=False, gzip_in_parallel=False, verbose=verbose, very_verbose=very_verbose, ) if presorted: lexer.process_presorted_input(kr, kw) else: lexer.process_unsorted_input(kr, kw, add_entity_labels=add_entity_labels_from_input) return 0 except Exception as e: raise KGTKException(str(e)) finally: if kw is not None: kw.close() if kr is not None: kr.close()