示例#1
0
            intercept = val
        elif opt == "--lambda":
            param = val
        elif opt == "--lambda_range":
            param_range = val.split(",")
        elif opt == "--export_params":
            export_params = val
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file, "Input file required", usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(model, "Model to be trained required", usage)
    utils.assert_option_not_none(arg_space,
                                 "Argument space(s) file(s) required", usage)
    utils.assert_option_not_none(phrase_space, "Phrase space file required",
                                 usage)

    crossvalidation = eval(crossvalidation)
    intercept = eval(intercept)
    utils.assert_bool(intercept, "intercept must be True/False", usage)
    utils.assert_bool(crossvalidation, "crossvalidation must be True/False",
                      usage)
示例#2
0
            in_dir = val
        elif opt in ("-m", "--sim_measures"):
            sim_measures = val.split(",")
        elif opt in ("-s", "--space"):
            spaces = val.split(",")
        elif opt in ("-c", "--columns"):
            columns = val.split(",")
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file, "Input file required", usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(sim_measures, "Similarity measures required", usage)
    utils.assert_option_not_none(columns, "Columns to be read from input file required", usage)

    if not in_dir is None:
        compute_sim_batch(in_file, columns, out_dir, sim_measures, in_dir)
    else:
        utils.assert_option_not_none(spaces, "Semantic space file required", usage)
        compute_sim(in_file, columns, out_dir, sim_measures, spaces)


if __name__ == '__main__':
    main(sys.argv)
示例#3
0
def main(sys_argv):
    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:o:c:l:", [
            "help", "input=", "output=", "core=", "log=", "input_format=",
            "output_format=", "core_in_dir=", "core_filter=", "gz="
        ])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)

    out_dir = None
    in_file_prefix = None
    core_space_file = None
    log_file = './build_core_space.log'
    in_format = None
    out_format = None
    core_in_dir = None
    core_filter = ""
    gz = "False"

    section = "build_peripheral_space"

    if len(argv) == 1:
        config_file = argv[0]
        config = ConfigParser()
        config.read(config_file)
        out_dir = utils.config_get(section, config, "output", None)
        in_file_prefix = utils.config_get(section, config, "input", None)
        core_space_file = utils.config_get(section, config, "core", None)
        core_in_dir = utils.config_get(section, config, "core_in_dir", None)
        core_filter = utils.config_get(section, config, "core_filter", "")
        log_file = utils.config_get(section, config, "log",
                                    './build_core_space.log')
        in_format = utils.config_get(section, config, "input_format", None)
        out_format = utils.config_get(section, config, "output_format", None)
        gz = utils.config_get(section, config, "gz", gz)

    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file_prefix = val
        elif opt in ("-o", "--output"):
            out_dir = val
        elif opt == "--gz":
            gz = val
        elif opt in ("-c", "--core"):
            core_space_file = val
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt == "--input_format":
            in_format = val
        elif opt == "--output_format":
            out_format = val
        elif opt == "--core_in_dir":
            core_in_dir = val
        elif opt == "--core_filter":
            core_filter = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file_prefix, "Input file prefix required",
                                 usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(in_format, "Input file format required",
                                 usage)

    gz = eval(gz)
    utils.assert_bool(gz, "--gz value must be True/False", usage)

    if not core_in_dir is None:
        build_space_batch(in_file_prefix, in_format, out_dir, out_format,
                          core_in_dir, core_filter, gz)
    else:
        utils.assert_option_not_none(core_space_file, "Input file required",
                                     usage)
        build_space(in_file_prefix, in_format, out_dir, out_format,
                    core_space_file, gz)
def main(sys_argv):
    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:m:c:l:", [
            "help", "input=", "correlation_measure=", "columns=", "log=",
            "in_dir=", "filter="
        ])

    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)

    in_file = None
    in_dir = None
    filter_ = ""
    corr_measures = None
    columns = None
    log_file = None

    section = "evaluate_similarities"

    if (len(argv) == 1):
        config_file = argv[0]
        config = ConfigParser()
        config.read(config_file)
        in_file = utils.config_get(section, config, "input", None)
        in_dir = utils.config_get(section, config, "in_dir", None)
        filter_ = utils.config_get(section, config, "filter", filter_)
        corr_measures = utils.config_get(section, config,
                                         "correlation_measure", None)
        if not corr_measures is None:
            corr_measures = corr_measures.split(",")
        columns = utils.config_get(section, config, "columns", None)
        if not columns is None:
            columns = columns.split(",")
        log_file = utils.config_get(section, config, "log", None)

    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file = val
        elif opt in ("-m", "--correlation_measure"):
            corr_measures = val.split(",")
        elif opt in ("-c", "--columns"):
            columns = val.split(",")
        elif opt == "--in_dir":
            in_dir = val
        elif opt == "--filter":
            filter_ = val
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(corr_measures,
                                 "Correlation measures required", usage)
    utils.assert_option_not_none(
        columns, "Columns to be read from input file required", usage)

    if len(columns) != 2:
        raise ValueError(
            "Columns (-c) field should contain two comma-separated integers (e.g. -c 3,4)"
        )

    if not in_dir is None:
        evaluate_sim_batch(in_dir, columns, corr_measures, filter_)
    else:
        utils.assert_option_not_none(in_file, "Input file required", usage)
        evaluate_sim(in_file, columns, corr_measures)
def main(sys_argv):
    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:o:m:a:l:", [
            "help", "input=", "output=", "model=", "alpha=", "beta=",
            "lambda=", "arg_space=", "load_model=", "output_format=", "log="
        ])

    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)

    out_dir = None
    in_file = None
    model = None
    arg_space = None
    trained_model = None
    alpha = None
    beta = None
    lambda_ = None
    log_file = None
    out_format = None

    section = "apply_composition"

    if (len(argv) == 1):
        config_file = argv[0]
        config = ConfigParser()
        config.read(config_file)
        out_dir = utils.config_get(section, config, "output", None)
        in_file = utils.config_get(section, config, "input", None)
        model = utils.config_get(section, config, "model", None)
        trained_model = utils.config_get(section, config, "load_model", None)
        arg_space = utils.config_get(section, config, "arg_space", None)
        if not arg_space is None:
            arg_space = arg_space.split(",")
        alpha = utils.config_get(section, config, "alpha", None)
        beta = utils.config_get(section, config, "beta", None)
        lambda_ = utils.config_get(section, config, "lambda", None)
        log_file = utils.config_get(section, config, "log", None)
        out_format = utils.config_get(section, config, "output_format", None)

    print(opts)
    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file = val
        elif opt in ("-o", "--output"):
            out_dir = val
        elif opt in ("-m", "--model"):
            model = val
        elif opt in ("-a", "--arg_space"):
            arg_space = val.split(",")
        elif opt == "--load_model":
            trained_model = val
        elif opt == "--alpha":
            alpha = val
        elif opt == "--beta":
            beta = val
        elif opt == "--lambda":
            lambda_ = val
        elif opt == "--output_format":
            out_format = val
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file, "Input file required", usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_xor_options(
        model, trained_model,
        "(Only) one of model name (-m) or file of model object (--load_model) are required!",
        usage)
    utils.assert_option_not_none(arg_space,
                                 "Argument space(s) file(s) required", usage)

    if not alpha is None:
        alpha = float(alpha)
    if not beta is None:
        beta = float(beta)
    if not lambda_ is None:
        lambda_ = float(lambda_)

    apply_model(in_file, out_dir, model, trained_model, arg_space, alpha, beta,
                lambda_, out_format)
示例#6
0
def main(sys_argv):
    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:o:s:m:c:l:", [
            "help", "input=", "output=", "sim_measures=", "space=", "in_dir=",
            "columns=", "log="
        ])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)

    out_dir = None
    in_file = None
    sim_measures = None
    spaces = None
    columns = None
    log_file = None
    in_dir = None
    section = "compute_similarities"

    if (len(argv) == 1):
        config_file = argv[0]
        config = ConfigParser()
        config.read(config_file)
        out_dir = utils.config_get(section, config, "output", None)
        in_file = utils.config_get(section, config, "input", None)
        in_dir = utils.config_get(section, config, "in_dir", None)
        sim_measures = utils.config_get(section, config, "sim_measures", None)
        if not sim_measures is None:
            sim_measures = sim_measures.split(",")
        spaces = utils.config_get(section, config, "space", None)
        if not spaces is None:
            spaces = spaces.split(",")
        columns = utils.config_get(section, config, "columns", None)
        if not columns is None:
            columns = columns.split(",")
        log_file = utils.config_get(section, config, "log", None)

    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file = val
        elif opt in ("-o", "--output"):
            out_dir = val
        elif opt == ("--in_dir"):
            in_dir = val
        elif opt in ("-m", "--sim_measures"):
            sim_measures = val.split(",")
        elif opt in ("-s", "--space"):
            spaces = val.split(",")
        elif opt in ("-c", "--columns"):
            columns = val.split(",")
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file, "Input file required", usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(sim_measures, "Similarity measures required",
                                 usage)
    utils.assert_option_not_none(
        columns, "Columns to be read from input file required", usage)

    if not in_dir is None:
        compute_sim_batch(in_file, columns, out_dir, sim_measures, in_dir)
    else:
        utils.assert_option_not_none(spaces, "Semantic space file required",
                                     usage)
        compute_sim(in_file, columns, out_dir, sim_measures, spaces)
示例#7
0
def main(sys_argv):
    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:o:s:m:n:l:", [
            "help", "input=", "output=", "sim_measures=", "space=", "log=",
            "no_neighbours="
        ])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)

    section = "compute_neighbours"

    out_dir = None
    in_file = None
    sim_measure = None
    spaces = None
    log_file = None
    no_neighbours = "20"

    if (len(argv) == 1):
        config_file = argv[0]
        with open(config_file) as f:
            pass
        config = ConfigParser()
        config.read(config_file)
        out_dir = utils.config_get(section, config, "output", None)
        in_file = utils.config_get(section, config, "input", None)
        sim_measure = utils.config_get(section, config, "sim_measure", None)
        spaces = utils.config_get(section, config, "space", None)
        if not spaces is None:
            spaces = spaces.split(",")
        no_neighbours = utils.config_get(section, config, "no_neighbours",
                                         no_neighbours)
        log_file = utils.config_get(section, config, "log", None)

    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file = val
        elif opt in ("-o", "--output"):
            out_dir = val
        elif opt in ("-m", "--sim_measure"):
            sim_measure = val
        elif opt in ("-s", "--space"):
            spaces = val.split(",")
        elif opt in ("-n", "--no_neighbours"):
            no_neighbours = val
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    no_neighbours = int(no_neighbours)

    utils.assert_option_not_none(in_file, "Input file required", usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(sim_measure, "Similarity measure required",
                                 usage)
    utils.assert_option_not_none(spaces, "Semantic space file required", usage)

    compute_neighbours(in_file, no_neighbours, out_dir, sim_measure, spaces)
示例#8
0
def main(sys_argv):
    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:o:m:r:a:p:l:",
                                   ["help", "input=", "output=", "model=",
                                    "regression=", "intercept=", "arg_space=",
                                    "phrase_space=", "export_params=", "log=",
                                    "crossvalidation=", "lambda_range=", "lambda="])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)


    out_dir = None
    in_file = None
    model = None
    regression = None
    crossvalidation = "False"
    intercept = "True"
    param_range = None
    arg_space = None
    phrase_space = None
    export_params= "False"
    log_file = None
    param = None

    section = "train_composition"

    if (len(argv) == 1):
        config_file = argv[0]
        config = ConfigParser()
        config.read(config_file)
        out_dir = utils.config_get(section, config, "output", None)
        in_file = utils.config_get(section, config, "input", None)
        model = utils.config_get(section, config, "model", None)
        regression = utils.config_get(section, config, "regression", None)
        crossvalidation = utils.config_get(section, config, "crossvalidation", crossvalidation)
        intercept = utils.config_get(section, config, "intercept", intercept)
        param_range = utils.config_get(section, config, "lambda_range", None)
        if not param_range is None:
            param_range = param_range.split(",")
        param = utils.config_get(section, config, "lambda", None)
        arg_space = utils.config_get(section, config, "arg_space", None)
        if not arg_space is None:
            arg_space = arg_space.split(",")
        phrase_space = utils.config_get(section, config, "phrase_space", None)
        export_params = utils.config_get(section, config, "export_params", export_params)
        log_file = utils.config_get(section, config, "log", None)

    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file = val
        elif opt in ("-o", "--output"):
            out_dir = val
        elif opt in ("-m", "--model"):
            model = val
        elif opt in ("-a", "--arg_space"):
            arg_space = val.split(",")
        elif opt in ("-p", "--phrase_space"):
            phrase_space = val
        elif opt in ("-r", "--regression"):
            regression = val
        elif opt == "--crossvalidation":
            crossvalidation = val
        elif opt == "--intercept":
            intercept = val
        elif opt == "--lambda":
            param = val
        elif opt == "--lambda_range":
            param_range = val.split(",")
        elif opt == "--export_params":
            export_params = val
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            usage(1)

    log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file, "Input file required", usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(model, "Model to be trained required", usage)
    utils.assert_option_not_none(arg_space, "Argument space(s) file(s) required", usage)
    utils.assert_option_not_none(phrase_space, "Phrase space file required", usage)

    crossvalidation = eval(crossvalidation)
    intercept = eval(intercept)
    utils.assert_bool(intercept, "intercept must be True/False", usage)
    utils.assert_bool(crossvalidation, "crossvalidation must be True/False", usage)

    export_params = eval(export_params)
    utils.assert_bool(export_params, "export_params must be True/False", usage)
    if not param is None:
        param = float(param)
    if not param_range is None:
        param_range = [float(param) for param in param_range]

    if not crossvalidation and regression == "ridge":
        utils.assert_option_not_none(param, "Cannot run (no-crossvalidation) RidgeRegression with no lambda value!", usage)

    train_model(in_file, out_dir, model, arg_space, phrase_space, regression,
                crossvalidation, intercept, param, param_range, export_params)
def main(sys_argv):

    try:
        opts, argv = getopt.getopt(sys_argv[1:], "hi:o:w:s:r:n:l:", [
            "help", "input=", "output=", "weighting=", "selection=",
            "reduction=", "normalization=", "log=", "gz=", "input_format=",
            "output_format="
        ])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(1)

    out_dir = None
    in_file_prefix = None
    weightings = [None]
    selections = [None]
    reductions = [None]
    normalizations = [None]
    log_file = None
    in_format = None
    out_format = None
    gz = "False"
    section = "build_core_space"

    if len(argv) == 1:
        config_file = argv[0]
        with open(config_file) as f:
            pass
        config = ConfigParser()
        config.read(config_file)
        out_dir = utils.config_get(section, config, "output", None)
        in_file_prefix = utils.config_get(section, config, "input", None)
        weightings = utils.config_get(section, config, "weighting", [None])
        if not weightings == [None]:
            weightings = weightings.split(",")

        selections = utils.config_get(section, config, "selection", [None])
        if not selections == [None]:
            selections = selections.split(",")

        reductions = utils.config_get(section, config, "reduction", [None])
        if not reductions == [None]:
            reductions = reductions.split(",")

        normalizations = utils.config_get(section, config, "normalization",
                                          [None])
        if not normalizations == [None]:
            normalizations = normalizations.split(",")

        log_file = utils.config_get(section, config, "log", None)
        in_format = utils.config_get(section, config, "input_format", None)
        out_format = utils.config_get(section, config, "output_format", None)
        gz = utils.config_get(section, config, "gz", gz)

    for opt, val in opts:
        if opt in ("-i", "--input"):
            in_file_prefix = val
        elif opt in ("-o", "--output"):
            out_dir = val
        elif opt == "--gz":
            gz = val
        elif opt in ("-w", "--weighting"):
            weightings = val.split(",")
        elif opt in ("-s", "--selection"):
            selections = val.split(",")
        elif opt in ("-r", "--reduction"):
            reductions = val.split(",")
        elif opt in ("-n", "--normalization"):
            normalizations = val.split(",")
        elif opt in ("-l", "--log"):
            log_file = val
        elif opt == "--input_format":
            in_format = val
        elif opt == "--output_format":
            out_format = val
        elif opt in ("-h", "--help"):
            usage(0)
        else:
            usage(1)

    if not log_file is None:
        log_utils.config_logging(log_file)

    utils.assert_option_not_none(in_file_prefix, "Input file prefix required",
                                 usage)
    utils.assert_option_not_none(out_dir, "Output directory required", usage)
    utils.assert_option_not_none(in_format, "Input format required", usage)

    gz = eval(gz)

    utils.assert_bool(gz, "--gz value must be True/False", usage)

    build_spaces(in_file_prefix, in_format, out_dir, out_format, weightings,
                 selections, reductions, normalizations, gz)