Exemplo n.º 1
0
def grow_taxonomy(max_depth=1):
    # Load some aggregation function to manipulate arrays of arrays
    # (to materialize paths to the roots in the taxonomy)
    pg.check_run_if_undef(join(SQL_SCRIPTS_FOLDER, "array_aggregate.sql"))

    # Aggregate categories info and find semantic grounding by trying to match
    # with wikipedia articles
    pg.check_run_if_undef(join(SQL_SCRIPTS_FOLDER, "build_grounded_categories.sql"))
    pg.run_file(join(SQL_SCRIPTS_FOLDER, "init_taxonomy.sql"))

    current_depth = int(pg.select("SELECT max(depth) from taxonomy_dag"))
    if current_depth < max_depth:
        for depth in range(current_depth + 1, max_depth + 1):
            logging.info("Growing taxonomy to depth=%d", depth)
            pg.run_file(join(SQL_SCRIPTS_FOLDER, "grow_taxonomy.sql"))
Exemplo n.º 2
0
def grow_taxonomy(max_depth=1):
    # Load some aggregation function to manipulate arrays of arrays
    # (to materialize paths to the roots in the taxonomy)
    pg.check_run_if_undef(join(SQL_SCRIPTS_FOLDER, "array_aggregate.sql"))

    # Aggregate categories info and find semantic grounding by trying to match
    # with wikipedia articles
    pg.check_run_if_undef(join(SQL_SCRIPTS_FOLDER,
                               "build_grounded_categories.sql"))
    pg.run_file(join(SQL_SCRIPTS_FOLDER, "init_taxonomy.sql"))

    current_depth = int(pg.select("SELECT max(depth) from taxonomy_dag"))
    if current_depth < max_depth:
        for depth in range(current_depth + 1, max_depth + 1):
            logging.info("Growing taxonomy to depth=%d", depth)
            pg.run_file(join(SQL_SCRIPTS_FOLDER, "grow_taxonomy.sql"))
Exemplo n.º 3
0
    parser.add_argument(
        "--examples-file",
        default="dbpedia-examples.tsv.bz2",
        help="Filename to store the TSV export of the examples text" " categorized using the taxonomy.",
    )

    parser.add_argument(
        "--max-depth", default=1, type=int, help="Limit the depth of subcategories to follow from the roots."
    )

    parser.add_argument(
        "--max-items",
        default=None,
        type=int,
        help="Limit the number of rows to load from DBpedia archives" " (for debug purpose only)",
    )

    args = parser.parse_args()
    for operation in args.operations:
        if operation == "build_taxonomy":
            check_load_taxonomy_data(args.max_items)
            grow_taxonomy(args.max_depth)
        elif operation == "build_examples":
            check_load_examples_data(args.max_items)
            pg.run_file(join(SQL_SCRIPTS_FOLDER, "build_dataset.sql"))
        elif operation == "dump_taxonomy":
            dump_taxonomy(args.taxonomy_file)
        elif operation == "dump_examples":
            dump_examples(args.examples_file)
Exemplo n.º 4
0
        default='dbpedia-taxonomy.tsv',
        help='Filename to store the TSV export of the taxonomy.')

    parser.add_argument(
        '--examples-file', default='dbpedia-examples.tsv.bz2',
        help='Filename to store the TSV export of the examples text'
        ' categorized using the taxonomy.')

    parser.add_argument(
        '--max-depth', default=1, type=int,
        help='Limit the depth of subcategories to follow from the roots.',
    )

    parser.add_argument(
        '--max-items', default=None, type=int,
        help='Limit the number of rows to load from DBpedia archives'
        ' (for debug purpose only)')

    args = parser.parse_args()
    for operation in args.operations:
        if operation == 'build_taxonomy':
            check_load_taxonomy_data(args.max_items)
            grow_taxonomy(args.max_depth)
        elif operation == 'build_examples':
            check_load_examples_data(args.max_items)
            pg.run_file(join(SQL_SCRIPTS_FOLDER, "build_dataset.sql"))
        elif operation == 'dump_taxonomy':
            dump_taxonomy(args.taxonomy_file)
        elif operation == 'dump_examples':
            dump_examples(args.examples_file)