Exemplo n.º 1
0
def main(argv):
    num_threads = 1
    num_runs = 30

    try:
        opts, args = getopt.getopt(argv,"f:z:a:b:c:s:m:r:t:")
    except getopt.GetoptError:
        print "Bad Arguments to python script"
        sys.exit(2)

    settings = Elastic_Net_Settings()
    for opt, arg in opts:
        if opt == '-f':
            settings.num_features = int(arg)
        elif opt == '-z':
            settings.num_nonzero_features = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert(arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    settings.print_settings()
    sys.stdout.flush()
    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.make_correlated(settings.num_features, settings.num_nonzero_features)
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method not in ["SP", "SP0"] and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method, settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
def main(argv):
    seed = 10
    print "seed", seed
    np.random.seed(seed)

    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "m:t:r:")
    except getopt.GetoptError:
        print "Bad argument given to realdata_eval.py"
        sys.exit(2)

    settings = RealDataSettings()
    for opt, arg in opts:
        if opt == "-m":
            assert (arg in ["HC", "GS"])
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    print "TOTAL NUM RUNS %d" % num_runs
    sys.stdout.flush()

    geneset_dict = read_geneset_file()
    X_genesets, y, genesets = read_gene_expr_data(geneset_dict)
    print "num features", sum(
        [X_genesets[i].shape[1] for i in range(0, len(X_genesets))])
    print "total genesets ever", len(X_genesets)
    X_genesets = normalize_data(X_genesets)

    run_data = []
    for i in range(num_runs):
        data = Shuffled_Gene_Data(X_genesets, y, genesets)
        run_data.append(Iteration_Data(i, data, settings))

    if num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
Exemplo n.º 3
0
def main(argv):
    seed = 10
    print "seed", seed
    np.random.seed(seed)

    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "g:f:a:b:c:s:m:t:r:i")
    except getopt.GetoptError:
        print "Bad argument given to sgl_eval.py"
        sys.exit(2)

    settings = SGL_Settings()
    for opt, arg in opts:
        if opt == '-g':
            settings.expert_num_groups = int(arg)
        elif opt == '-f':
            settings.num_features = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.sparse_groups()
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
Exemplo n.º 4
0
def main(argv):
    seed = 10
    print "seed", seed
    np.random.seed(seed)

    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "d:z:f:a:v:s:m:t:r:i")
    except getopt.GetoptError:
        print "Bad argument given to Matrix_Completion_eval.py"
        sys.exit(2)

    settings = Matrix_Completion_Settings()
    for opt, arg in opts:
        if opt == '-d':
            arg_split = arg.split(",")
            settings.num_rows = int(arg_split[0])
            settings.num_cols = int(arg_split[1])
        elif opt == '-z':
            arg_split = arg.split(",")
            settings.num_nonzero_row_features = int(arg_split[0])
            settings.num_nonzero_col_features = int(arg_split[1])
        elif opt == '-f':
            arg_split = arg.split(",")
            settings.num_row_features = int(arg_split[0])
            settings.num_col_features = int(arg_split[1])
        elif opt == '-a':
            arg_split = arg.split(",")
            settings.train_perc = float(arg_split[0])
            settings.validate_perc = float(arg_split[1])
            settings.test_perc = float(arg_split[2])
            assert (settings.train_perc + settings.validate_perc +
                    settings.test_perc < 1)
        elif opt == "-v":
            settings.num_nonzero_s = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)
        elif opt == "-i":
            settings.big_init_set = True

    assert (settings.num_nonzero_s <= settings.num_rows
            and settings.num_nonzero_s <= settings.num_cols)
    # SP does not care about initialization
    assert (not (settings.big_init_set == True
                 and settings.method in ["SP", "SP0"]))

    settings.matrix_size = settings.num_rows * settings.num_cols
    settings.train_size = int(settings.train_perc * settings.matrix_size)
    settings.validate_size = int(settings.validate_perc * settings.matrix_size)
    settings.test_size = int(settings.test_perc * settings.matrix_size)

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.matrix_completion()
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
Exemplo n.º 5
0
def main(argv):
    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "d:z:f:g:a:v:s:m:t:r:i:")
    except getopt.GetoptError:
        print "Bad argument given"
        sys.exit(2)

    settings = Matrix_Completion_Group_Settings()
    for opt, arg in opts:
        if opt == '-d':
            arg_split = arg.split(",")
            settings.num_rows = int(arg_split[0])
            settings.num_cols = int(arg_split[1])
        elif opt == '-z':
            arg_split = arg.split(",")
            settings.num_nonzero_row_groups = int(arg_split[0])
            settings.num_nonzero_col_groups = int(arg_split[1])
        elif opt == '-f':
            arg_split = arg.split(",")
            settings.num_row_features = int(arg_split[0])
            settings.num_col_features = int(arg_split[1])
        elif opt == '-g':
            arg_split = arg.split(",")
            settings.num_row_groups = int(arg_split[0])
            settings.num_col_groups = int(arg_split[1])
        elif opt == '-a':
            arg_split = arg.split(",")
            settings.train_perc = float(arg_split[0])
            settings.validate_perc = float(arg_split[1])
            assert (settings.train_perc + settings.validate_perc <= 1.0)
        elif opt == "-v":
            settings.num_nonzero_s = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)
        elif opt == "-i":
            settings.gamma_to_row_col_m = float(arg)

    assert (settings.num_nonzero_s <= settings.num_rows
            and settings.num_nonzero_s <= settings.num_cols)

    settings.matrix_size = settings.num_rows * settings.num_cols

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.matrix_completion_groups(
            gamma_to_row_col_m=settings.gamma_to_row_col_m,
            feat_factor=settings.feat_factor)
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
Exemplo n.º 6
0
def main(argv):
    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "f:z:a:b:c:s:m:t:r:")
    except getopt.GetoptError:
        sys.exit(2)

    settings = Sparse_Add_Models_Settings()
    for opt, arg in opts:
        if opt == '-f':
            settings.num_funcs = int(arg)
        elif opt == '-z':
            settings.num_zero_funcs = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    assert (settings.num_funcs <= len(settings.smooth_fcns))
    smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [
        const_zero
    ] * settings.num_zero_funcs
    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list)
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
def main(argv):
    num_threads = 1
    seed = 20
    print "seed", seed
    np.random.seed(seed)

    try:
        opts, args = getopt.getopt(argv, "t:r:f:z:a:b:c:s:m:i:")
    except getopt.GetoptError:
        sys.exit(2)

    settings = Sparse_Add_Models_Multiple_Starts_Settings()
    for opt, arg in opts:
        if opt == '-t':
            num_threads = int(arg)
        elif opt == '-f':
            settings.num_funcs = int(arg)
        elif opt == '-z':
            settings.num_zero_funcs = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in ["HC", "NM"])
            settings.method = arg
        elif opt == "-i":
            settings.init_size = int(arg)

    assert (settings.num_funcs <= len(settings.smooth_fcns))

    smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [
        const_zero
    ] * settings.num_zero_funcs
    data_gen = DataGenerator(settings)

    observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list)

    # Create initial lambdas
    num_lambdas = 1 + settings.num_funcs + settings.num_zero_funcs
    # initial_lambdas_set = [
    #     np.array([10] + [1] * (num_lambdas - 1)),
    #     np.array([0.1] + [0.01] * (num_lambdas - 1)),
    # ]
    # for i in range(settings.init_size - 2):
    #     init_l = np.power(10.0, np.random.randint(low=settings.min_init_log_lambda, high=settings.max_init_log_lambda, size=num_lambdas))

    # Pool the last lambdas together. Shuffle the possibilities
    initial_lambdas_set = []
    init_values = np.power(
        10.0,
        np.arange(settings.min_init_log_lambda,
                  settings.max_init_log_lambda + 1))
    for l1 in init_values:
        for l2 in init_values:
            full_init_l = np.array([l1] + [l2] * (num_lambdas - 1))
            initial_lambdas_set.append(full_init_l)
    permute_idxs = np.random.permutation(
        np.arange(0,
                  len(init_values) * len(init_values)))
    settings.init_size = permute_idxs.size

    settings.print_settings()
    sys.stdout.flush()

    run_data = []
    for i, idx in enumerate(permute_idxs):
        init_lambdas = initial_lambdas_set[idx]
        print "init_lambdas", init_lambdas
        run_data.append(
            Iteration_Data(i,
                           observed_data,
                           settings,
                           init_lambdas=[init_lambdas]))

    if num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    print "results", results
    cum_results = CumulativeInitializationResults(observed_data, settings)
    for r in results:
        print r
        cum_results.update(r)

    print "==========RUNS============"
    print "cumulative_val_cost", cum_results.cumulative_val_cost
    print "cumulative_test_cost", cum_results.cumulative_test_cost

    pickle_file_name = "%s/tmp/%s_many_inits_%d_%d_%d_%d_%d_%d_%d.pkl" % (
        settings.results_folder,
        settings.method,
        settings.num_funcs,
        settings.num_zero_funcs,
        settings.train_size,
        settings.validate_size,
        settings.test_size,
        settings.snr,
        settings.init_size,
    )
    print "pickle_file_name", pickle_file_name
    with open(pickle_file_name, "wb") as f:
        pickle.dump(
            {
                "initial_lambdas_set": initial_lambdas_set,
                "cum_results": cum_results,
            }, f)

    # plot_mult_inits(cum_results, str_identifer)
    print "DONE!"