示例#1
0
def edge_summary(filepath, input_type, max_rows, output):
    """
    Loads and summarizes a knowledge graph edge set
    """
    t = build_transformer(filepath, input_type)
    t.parse(filepath)

    g = t.graph

    tuples = []
    with click.progressbar(g.edges(data=True), label='Reading knowledge graph') as bar:
        for s, o, edge_attr in bar:
            subject_attr = g.node[s]
            object_attr = g.node[o]

            subject_prefix = stringify(get_prefix(s))
            object_prefix = stringify(get_prefix(o))

            subject_category = stringify(subject_attr.get('category'))
            object_category = stringify(object_attr.get('category'))
            edge_label = stringify(edge_attr.get('edge_label'))
            relation = stringify(edge_attr.get('relation'))

            tuples.append((subject_prefix, subject_category, edge_label, relation, object_prefix, object_category))

    tuple_count = OrderedDict(Counter(tuples).most_common(max_rows))

    headers = [['Subject Prefix', 'Subject Category', 'Edge Label', 'Relation', 'Object Prefix', 'Object Category', 'Frequency']]
    rows = [[*k, v] for k, v in tuple_count.items()]

    if output is not None:
        file_write(output, AsciiTable(headers + rows).table)
    else:
        click.echo(AsciiTable(headers + rows).table)
示例#2
0
def edge_summary(config: dict, filepath: str, input_type: str, max_rows: int,
                 output: str):
    """
    Loads and summarizes a knowledge graph edge set, where the input is a file.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    filepath: str
        Input file
    input_type: str
        Input file type
    max_rows: int
        Max number of rows to display in the output
    output: str
        Where to write the output (stdout, by default)

    """
    t = build_transformer(filepath, input_type)
    t.parse(filepath)

    g = t.graph

    tuples = []
    with click.progressbar(g.edges(data=True),
                           label='Reading knowledge graph') as bar:
        for s, o, edge_attr in bar:
            subject_attr = g.node[s]
            object_attr = g.node[o]

            subject_prefix = stringify(get_prefix(s))
            object_prefix = stringify(get_prefix(o))

            subject_category = stringify(subject_attr.get('category'))
            object_category = stringify(object_attr.get('category'))
            edge_label = stringify(edge_attr.get('edge_label'))
            relation = stringify(edge_attr.get('relation'))

            tuples.append((subject_prefix, subject_category, edge_label,
                           relation, object_prefix, object_category))

    tuple_count = OrderedDict(Counter(tuples).most_common(max_rows))

    headers = [[
        'Subject Prefix', 'Subject Category', 'Edge Label', 'Relation',
        'Object Prefix', 'Object Category', 'Frequency'
    ]]
    rows = [[*k, v] for k, v in tuple_count.items()]

    if output is not None:
        file_write(output, AsciiTable(headers + rows).table)
    else:
        click.echo(AsciiTable(headers + rows).table)
示例#3
0
def node_summary(config: dict, filepath: str, input_type: str, max_rows: int,
                 output: str):
    """
    Loads and summarizes a knowledge graph node set, where the input is a file.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    filepath: str
        Input file
    input_type: str
        Input file type
    max_rows: int
        Max number of rows to display in the output
    output: str
        Where to write the output (stdout, by default)

    """
    t = build_transformer(filepath, input_type)
    t.parse(filepath)

    g = t.graph

    tuples = []
    xrefs = set()
    with click.progressbar(g.nodes(data=True),
                           label='Reading knowledge graph') as bar:
        for n, data in bar:
            if 'same_as' in data:
                for xref in data['same_as']:
                    xrefs.add(get_prefix(xref))

            category = data.get('category')
            prefix = get_prefix(n)

            if category is not None and len(
                    category) > 1 and 'named_thing' in category:
                category.remove('named_thing')

            if isinstance(category, (list, set)):
                category = ", ".join("'{}'".format(c) for c in category)

            if prefix is not None:
                prefix = "'{}'".format(prefix)

            tuples.append((prefix, category))

    click.echo('|nodes|: {}'.format(len(g.nodes())))
    click.echo('|edges|: {}'.format(len(g.edges())))

    xrefs = [x for x in xrefs if x is not None]
    if len(xrefs) != 0:
        line = 'xref prefixes: {}'.format(', '.join(xrefs))
        if output is not None:
            file_write(output, '|nodes|: {}'.format(len(g.nodes())))
            file_write(output, '|edges|: {}'.format(len(g.edges())))
            file_write(output, line)
        else:
            click.echo('|nodes|: {}'.format(len(g.nodes())))
            click.echo('|edges|: {}'.format(len(g.edges())))
            click.echo(line)

    tuple_count = OrderedDict(Counter(tuples).most_common(max_rows))

    headers = [['Prefix', 'Category', 'Frequency']]
    rows = [[*k, v] for k, v in tuple_count.items()]
    if output is not None:
        file_write(output, AsciiTable(headers + rows).table, mode='a')
    else:
        click.echo(AsciiTable(headers + rows).table)

    category_count = defaultdict(lambda: 0)
    prefix_count = defaultdict(lambda: 0)

    for (prefix, category), frequency in tuple_count.items():
        category_count[category] += frequency
        prefix_count[prefix] += frequency

    headers = [['Category', 'Frequency']]
    rows = [[k, v] for k, v in category_count.items()]
    if output is not None:
        file_write(output, AsciiTable(headers + rows).table, mode='a')
    else:
        click.echo(AsciiTable(headers + rows).table)

    headers = [['Prefixes', 'Frequency']]
    rows = [[k, v] for k, v in prefix_count.items()]

    if output is not None:
        file_write(output, AsciiTable(headers + rows).table, mode='a')
    else:
        click.echo(AsciiTable(headers + rows).table)
示例#4
0
def node_summary(filepath, input_type, max_rows, output):
    """
    Loads and summarizes a knowledge graph node set
    """
    t = build_transformer(filepath, input_type)
    t.parse(filepath)

    g = t.graph

    tuples = []
    xrefs = set()
    with click.progressbar(g.nodes(data=True),
                           label='Reading knowledge graph') as bar:
        for n, data in bar:
            if 'same_as' in data:
                for xref in data['same_as']:
                    xrefs.add(get_prefix(xref))

            category = data.get('category')
            prefix = get_prefix(n)

            if category is not None and len(
                    category) > 1 and 'named thing' in category:
                category.remove('named thing')

            if isinstance(category, (list, set)):
                category = ", ".join("'{}'".format(c) for c in category)

            if prefix is not None:
                prefix = "'{}'".format(prefix)

            tuples.append((prefix, category))

    xrefs = [x for x in xrefs if x is not None]
    if len(xrefs) != 0:
        line = 'xref prefixes: {}'.format(', '.join(xrefs))
        if output is not None:
            file_write(output, line)
        else:
            click.echo(line)

    tuple_count = OrderedDict(Counter(tuples).most_common(max_rows))

    headers = [['Prefix', 'Category', 'Frequency']]
    rows = [[*k, v] for k, v in tuple_count.items()]
    if output is not None:
        file_write(output, AsciiTable(headers + rows).table, mode='a')
    else:
        click.echo(AsciiTable(headers + rows).table)

    category_count = defaultdict(lambda: 0)
    prefix_count = defaultdict(lambda: 0)

    for (prefix, category), frequency in tuple_count.items():
        category_count[category] += frequency
        prefix_count[prefix] += frequency

    headers = [['Category', 'Frequency']]
    rows = [[k, v] for k, v in category_count.items()]
    if output is not None:
        file_write(output, AsciiTable(headers + rows).table, mode='a')
    else:
        click.echo(AsciiTable(headers + rows).table)

    headers = [['Prefixes', 'Frequency']]
    rows = [[k, v] for k, v in prefix_count.items()]

    if output is not None:
        file_write(output, AsciiTable(headers + rows).table, mode='a')
    else:
        click.echo(AsciiTable(headers + rows).table)