Пример #1
0
def main(argv):
    if len(argv) != 12:
        print_usage(argv[0])
    m = int(argv[1])  # number of alternatives
    t = int(
        argv[2]
    )  # (index + 1) of final dataset (number of datasets if base filename is 0...00)
    n_init = int(argv[3])  # initial experiment number of votes
    if not n_init > 0:
        print("Error: Starting number of votes must be greater than 0")
        print_usage(argv[0])
    n_stop = int(argv[4])  # final experiment number of votes
    if not n_stop > n_init:
        print(
            "Error: Final number of votes must be greater than starting number of votes"
        )
        print_usage(argv[0])
    n_step = int(argv[5])  # number of votes to increment by each time
    if not n_step > 0:
        print("Error: Step number of votes must be greater than 0")
        print_usage(argv[0])
    elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0:
        print("Warning: Step number of votes doesn't fit range")
    p = ((n_stop - n_init) // n_step) + 1  # always positive and >= 1 by above

    # read in all data required for experiments
    print("Reading Datasets from Disk...")
    datasets = []
    data_filename_base = argv[6]
    d = int(data_filename_base.split("_")[-1])
    if d < 0:
        print(
            "Error: dataset base file name must not contain a negative number")
        print_usage(argv[0])
    len_d = str(len(data_filename_base.split("_")[-1]))
    data_filename_base = "_".join(data_filename_base.split("_")[:-1])
    for i in range(t):
        infilename = data_filename_base + '_' + ("{0:0" + len_d +
                                                 "d}").format(i + d) + ".csv"
        infile = open(infilename)
        datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop))

    # Check files can be written to later:
    wsse_filename = argv[7]
    wsse_file = open(wsse_filename, 'w')
    sse_filename = argv[8]
    sse_file = open(sse_filename, 'w')
    time_filename = argv[9]
    time_file = open(time_filename, 'w')
    plot_filename = argv[10]
    plot_file = open(plot_filename, 'w')
    gmm_solns_filename = argv[11]
    gmm_solns_file = open(gmm_solns_filename, 'wb')  # writable binary mode

    wsse_res = np.empty((p, 2))
    sse_res = np.empty((p, 2))
    time_res = np.empty((p, 4))

    gmm_solns = []

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    print("Initializing Aggregator Classes...")
    gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True)

    print("Starting Experiments...")
    k_n = 0  # experiment index number
    for n in range(n_init, n_stop + 1, n_step):  # for these numbers of agents
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()

        wsse_vals = np.empty((1, t))
        sse_vals = np.empty((1, t))
        time_vals = np.empty((3, t))

        for i in range(t):
            print("\b" * len(str(i - 1)) + str(i), end='')
            sys.stdout.flush()

            # get data
            params, votes = datasets[i]
            votes_curr = votes[:n]

            # DEFAULT: top3_full GMM (20 moments)
            time_val = time.perf_counter()
            soln, t0, t1 = gmmagg.aggregate(  ##rankings = votes_curr,
                rankings=None,  # for ground-truth empirical limit
                algorithm="top3_full",
                epsilon=None,
                max_iters=None,
                approx_step=None,
                ##opto = "matlab_default",
                opto="matlab_emp_default",  # for ground-truth empirical limit
                ##true_params = None
                true_params=params  # for ground-truth empirical limit
            )
            time_val = time.perf_counter() - time_val
            wsse_val = stats.mix2PL_wsse(params, soln, m)
            sse_val = stats.mix2PL_sse(params, soln, m)
            wsse_vals[0][i] = wsse_val
            sse_vals[0][i] = sse_val
            time_vals[0][i] = t0
            time_vals[1][i] = t1
            time_vals[2][i] = time_val
            gmm_result = gmm_mixpl.GMMMixPLResult(
                num_alts=m,
                ##num_votes = n,
                num_votes=0,  # ground-truth empirical limit
                num_mix=2,
                true_params=params,
                cond="top3_full",
                ##opto = "matlab_default",
                opto="matlab_emp_default",  # ground-truth empirical limit
                soln_params=soln,
                momnts_runtime=t0,
                opto_runtime=t1,
                overall_runtime=time_val)
            gmm_solns.append(gmm_result)

        print()
        wsse_res[k_n][0] = n
        wsse_res[k_n][1] = np.mean(wsse_vals[0])  # GMM

        sse_res[k_n][0] = n
        sse_res[k_n][1] = np.mean(sse_vals[0])  # GMM

        time_res[k_n][0] = n
        time_res[k_n][1] = np.mean(time_vals[0])  # GMM t0 (moment-calc)
        time_res[k_n][2] = np.mean(time_vals[1])  # GMM t1 (optimization)
        time_res[k_n][3] = np.mean(time_vals[2])  # GMM overall time

        # write results intermediately after a full set of trials for each n
        pickle.dump(gmm_solns, gmm_solns_file)

        k_n += 1

    pickle.dump(gmm_solns, gmm_solns_file)
    gmm_solns_file.close()
    np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n")
    wsse_file.close()
    np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n")
    sse_file.close()
    np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n")
    time_file.close()

    plot.plot_error_time_data(str_error_type="MSE",
                              error_results=sse_res,
                              time_results=time_res,
                              output_img_filename=plot_filename)
    plot_file.close()
Пример #2
0
def main(argv):
    if len(argv) != 14:
        print_usage(argv[0])
    m = int(argv[1]) # number of alternatives
    t = int(argv[2]) # (index + 1) of final dataset (number of datasets if base filename is 0...00)
    n_init = int(argv[3]) # initial experiment number of votes
    if not n_init > 0:
        print("Error: Starting number of votes must be greater than 0")
        print_usage(argv[0])
    n_stop = int(argv[4]) # final experiment number of votes
    if not n_stop > n_init:
        print("Error: Final number of votes must be greater than starting number of votes")
        print_usage(argv[0])
    n_step = int(argv[5]) # number of votes to increment by each time
    if not n_step > 0:
        print("Error: Step number of votes must be greater than 0")
        print_usage(argv[0])
    elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0:
        print("Warning: Step number of votes doesn't fit range")
    p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above

    # Set EMM algorithm iterations & convergence parameters:
    emm_epsilon = None # TODO: add option to specify on command-line
    mm_epsilon = None # TODO: add option to specify on command-line
    tot_iters = int(argv[6])
    if tot_iters < 1:
        print("Error: Invalid argument for total sum of iterations")
        print_usage(argv[0])
    em_iters = int(argv[7])
    if em_iters < 1:
        print("Error: Invalid argument for EM iterations")
        print_usage(argv[0])
    if em_iters > tot_iters:
        print("Error: Total iterations less than EM iterations")
        print_usage(argv[0])

    # read in all data required for experiments
    print("Reading Datasets from Disk...")
    datasets = []
    data_filename_base = argv[8]
    d = int(data_filename_base.split("_")[-1])
    if d < 0:
        print("Error: dataset base file name must not contain a negative number")
        print_usage(argv[0])
    len_d = str(len(data_filename_base.split("_")[-1]))
    data_filename_base = "_".join(data_filename_base.split("_")[:-1])
    for i in range(t):
        infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv"
        infile = open(infilename)
        datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop))

    # Check files can be written to later:
    wsse_filename = argv[9]
    wsse_file = open(wsse_filename, 'w')
    sse_filename = argv[10]
    sse_file = open(sse_filename, 'w')
    time_filename = argv[11]
    time_file = open(time_filename, 'w')
    plot_filename = argv[12]
    plot_file = open(plot_filename, 'w')
    emm_solns_filename = argv[13]
    emm_solns_file = open(emm_solns_filename, 'wb') # writable binary mode

    # open previous experiments results files
    orig_error_results = np.loadtxt("../../MixPL_GmmExperiments/mse_mixPL_04-alts_2000-trials_22.csv", delimiter=',')
    orig_time_results = np.loadtxt("../../MixPL_GmmExperiments/time_mixPL_04-alts_2000-trials_22.csv", delimiter=',')

    wsse_res = np.empty((p, 2))
    sse_res = np.empty((p, 2))
    time_res = np.empty((p, 2))

    emm_solns = []

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    print("Initializing Aggregator Classes...")
    emmagg = emm.EMMMixPLAggregator(alts)

    print("Starting Experiments...")
    k_n = 0 # experiment index number
    for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()

        wsse_vals = np.empty((1,t))
        sse_vals = np.empty((1,t))
        time_vals = np.empty((1,t))

        for i in range(t):
            print("\b"*len(str(i-1)) + str(i), end='')
            sys.stdout.flush()

            # get data
            params, votes = datasets[i]
            votes_curr = votes[:n]

            # EMM
            time_val = time.perf_counter()
            emm_pi, emm_p, pi_0, p_0 = emmagg.aggregate(votes_curr,
                                                        K=2,
                                                        epsilon=emm_epsilon,
                                                        tot_iters=tot_iters,
                                                        epsilon_mm=mm_epsilon,
                                                        max_iters_em=em_iters
                                                       )
            time_val = time.perf_counter() - time_val
            soln = np.hstack((emm_pi[0], emm_p[0], emm_p[1]))
            wsse_val = stats.mix2PL_wsse(params, soln, m)
            sse_val = stats.mix2PL_sse(params, soln, m)
            wsse_vals[0][i] = wsse_val
            sse_vals[0][i] = sse_val
            time_vals[0][i] = time_val
            emm_result = emm.EMMMixPLResult(num_alts = m,
                                            num_votes = n,
                                            num_mix = 2,
                                            true_params = params,
                                            epsilon = emm_epsilon,
                                            max_iters = "total:" + str(tot_iters),
                                            epsilon_mm = mm_epsilon,
                                            max_iters_mm = "actually-EM:" + str(em_iters),
                                            init_guess = np.hstack((
                                                pi_0[0],
                                                p_0[0],
                                                p_0[1]
                                            )),
                                            soln_params = soln,
                                            runtime = time_val
                                           )
            emm_solns.append(emm_result)


        print()
        wsse_res[k_n][0] = n
        wsse_res[k_n][1] = np.mean(wsse_vals[0]) # EMM

        sse_res[k_n][0] = n
        sse_res[k_n][1] = np.mean(sse_vals[0]) # EMM

        time_res[k_n][0] = n
        time_res[k_n][1] = np.mean(time_vals[0]) # EMM

        # write results intermediately after a full set of trials for each n
        pickle.dump(emm_solns, emm_solns_file)

        k_n += 1

    pickle.dump(emm_solns, emm_solns_file)
    emm_solns_file.close()
    np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n")
    wsse_file.close()
    np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n")
    sse_file.close()
    np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n")
    time_file.close()

    plot.plot_error_time_data(str_error_type="MSE",
                              iter_1=em_iters,
                              iter_2=tot_iters // em_iters,
                              error_results=sse_res,
                              time_results=time_res,
                              orig_error_results=orig_error_results,
                              orig_time_results=orig_time_results,
                              output_img_filename=plot_filename
                             )
    plot_file.close()
Пример #3
0
def main(argv):
    if len(argv) != 12:
        print_usage(argv[0])
    m = int(argv[1]) # number of alternatives
    t = int(argv[2]) # (index + 1) of final dataset (number of datasets if base filename is 0...00)
    n_init = int(argv[3]) # initial experiment number of votes
    if not n_init > 0:
        print("Error: Starting number of votes must be greater than 0")
        print_usage(argv[0])
    n_stop = int(argv[4]) # final experiment number of votes
    if not n_stop > n_init:
        print("Error: Final number of votes must be greater than starting number of votes")
        print_usage(argv[0])
    n_step = int(argv[5]) # number of votes to increment by each time
    if not n_step > 0:
        print("Error: Step number of votes must be greater than 0")
        print_usage(argv[0])
    elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0:
        print("Warning: Step number of votes doesn't fit range")
    p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above

    # read in all data required for experiments
    print("Reading Datasets from Disk...")
    datasets = []
    data_filename_base = argv[6]
    d = int(data_filename_base.split("_")[-1])
    if d < 0:
        print("Error: dataset base file name must not contain a negative number")
        print_usage(argv[0])
    len_d = str(len(data_filename_base.split("_")[-1]))
    data_filename_base = "_".join(data_filename_base.split("_")[:-1])
    for i in range(t):
        infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv"
        infile = open(infilename)
        datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop))

    # Check files can be written to later:
    wsse_filename = argv[7]
    wsse_file = open(wsse_filename, 'w')
    sse_filename = argv[8]
    sse_file = open(sse_filename, 'w')
    time_filename = argv[9]
    time_file = open(time_filename, 'w')
    plot_filename = argv[10]
    plot_file = open(plot_filename, 'w')
    gmm_solns_filename = argv[11]
    gmm_solns_file = open(gmm_solns_filename, 'wb') # writable binary mode

    wsse_res = np.empty((p, 2))
    sse_res = np.empty((p, 2))
    time_res = np.empty((p, 4))

    gmm_solns = []

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    print("Initializing Aggregator Classes...")
    gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True)

    print("Starting Experiments...")
    k_n = 0 # experiment index number
    for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()

        wsse_vals = np.empty((1,t))
        sse_vals = np.empty((1,t))
        time_vals = np.empty((3,t))

        for i in range(t):
            print("\b"*len(str(i-1)) + str(i), end='')
            sys.stdout.flush()

            # get data
            params, votes = datasets[i]
            votes_curr = votes[:n]

            # DEFAULT: top3_full GMM (20 moments)
            time_val = time.perf_counter()
            soln, t0, t1 = gmmagg.aggregate(##rankings = votes_curr,
                                            rankings = None, # for ground-truth empirical limit
                                            algorithm = "top3_full",
                                            epsilon = None,
                                            max_iters = None,
                                            approx_step = None,
                                            ##opto = "matlab_default",
                                            opto = "matlab_emp_default", # for ground-truth empirical limit
                                            ##true_params = None
                                            true_params = params # for ground-truth empirical limit
                                           )
            time_val = time.perf_counter() - time_val
            wsse_val = stats.mix2PL_wsse(params, soln, m)
            sse_val = stats.mix2PL_sse(params, soln, m)
            wsse_vals[0][i] = wsse_val
            sse_vals[0][i] = sse_val
            time_vals[0][i] = t0
            time_vals[1][i] = t1
            time_vals[2][i] = time_val
            gmm_result = gmm_mixpl.GMMMixPLResult(num_alts = m,
                                                  ##num_votes = n,
                                                  num_votes = 0, # ground-truth empirical limit
                                                  num_mix = 2,
                                                  true_params = params,
                                                  cond = "top3_full",
                                                  ##opto = "matlab_default",
                                                  opto = "matlab_emp_default",  # ground-truth empirical limit
                                                  soln_params = soln,
                                                  momnts_runtime = t0,
                                                  opto_runtime = t1,
                                                  overall_runtime = time_val
                                                 )
            gmm_solns.append(gmm_result)


        print()
        wsse_res[k_n][0] = n
        wsse_res[k_n][1] = np.mean(wsse_vals[0]) # GMM

        sse_res[k_n][0] = n
        sse_res[k_n][1] = np.mean(sse_vals[0]) # GMM

        time_res[k_n][0] = n
        time_res[k_n][1] = np.mean(time_vals[0]) # GMM t0 (moment-calc)
        time_res[k_n][2] = np.mean(time_vals[1]) # GMM t1 (optimization)
        time_res[k_n][3] = np.mean(time_vals[2]) # GMM overall time

        # write results intermediately after a full set of trials for each n
        pickle.dump(gmm_solns, gmm_solns_file)

        k_n += 1

    pickle.dump(gmm_solns, gmm_solns_file)
    gmm_solns_file.close()
    np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n")
    wsse_file.close()
    np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n")
    sse_file.close()
    np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n")
    time_file.close()

    plot.plot_error_time_data(str_error_type="MSE",
                              error_results=sse_res,
                              time_results=time_res,
                              output_img_filename=plot_filename
                             )
    plot_file.close()
Пример #4
0
def main(argv):
    if len(argv) != 16:
        print_usage(argv[0])
    m = int(argv[1]) # number of alternatives
    t = int(argv[2]) # (index + 1) of final dataset (number of datasets if base filename is 0...00)
    n_init = int(argv[3]) # initial experiment number of votes
    if not n_init > 0:
        print("Error: Starting number of votes must be greater than 0")
        print_usage(argv[0])
    n_stop = int(argv[4]) # final experiment number of votes
    if not n_stop > n_init:
        print("Error: Final number of votes must be greater than starting number of votes")
        print_usage(argv[0])
    n_step = int(argv[5]) # number of votes to increment by each time
    if not n_step > 0:
        print("Error: Step number of votes must be greater than 0")
        print_usage(argv[0])
    elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0:
        print("Warning: Step number of votes doesn't fit range")
    p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above

    # Set EMM algorithm iterations & convergence parameters:
    emm_epsilon = None # TODO: add option to specify on command-line
    mm_epsilon = None # TODO: add option to specify on command-line
    emm_iters = int(argv[6])
    if emm_iters < 1:
        emm_epsilon = 1e-8
        emm_iters = 500
    mm_iters = int(argv[7])
    if mm_iters < 1:
        mm_epsilon = 1e-8
        mm_iters = 50

    # read in all data required for experiments
    print("Reading Datasets from Disk...")
    datasets = []
    data_filename_base = argv[8]
    d = int(data_filename_base.split("_")[-1])
    if d < 0:
        print("Error: dataset base file name must not contain a negative number")
        print_usage(argv[0])
    len_d = str(len(data_filename_base.split("_")[-1]))
    data_filename_base = "_".join(data_filename_base.split("_")[:-1])
    for i in range(t):
        infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv"
        infile = open(infilename)
        datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop))

    # Check files can be written to later:
    wsse_filename = argv[9]
    wsse_file = open(wsse_filename, 'w')
    sse_filename = argv[10]
    sse_file = open(sse_filename, 'w')
    time_filename = argv[11]
    time_file = open(time_filename, 'w')
    plot_filename = argv[12]
    plot_file = open(plot_filename, 'w')
    gmm_solns_filename = argv[13]
    gmm_solns_file = open(gmm_solns_filename, 'wb') # writable binary mode
    emm_solns_filename = argv[14]
    emm_solns_file = open(emm_solns_filename, 'wb') # writable binary mode
    ttest_wsse_filename = argv[15]
    ttest_wsse_file = open(ttest_wsse_filename, 'w')
    ttest_sse_filename = argv[16]
    ttest_sse_file = open(ttest_sse_filename, 'w')

    wsse_res = np.empty((p, 3))
    sse_res = np.empty((p, 3))
    time_res = np.empty((p, 5))
    ##wsse_res = np.empty((p, 2))
    ##sse_res = np.empty((p, 2))
    ##time_res = np.empty((p, 4))
    ttest_vals = np.empty((2,p,3)) # 2 t-tests X p points X 3 values (n, mean, std)

    gmm_solns = []
    emm_solns = []

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    print("Initializing Aggregator Classes...")
    gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True)
    emmagg = emm.EMMMixPLAggregator(alts)

    print("Starting Experiments...")
    k_n = 0 # experiment index number
    for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()

        wsse_vals = np.empty((2,t))
        sse_vals = np.empty((2,t))
        time_vals = np.empty((4,t))
        ##wsse_vals = np.empty((1,t))
        ##sse_vals = np.empty((1,t))
        ##time_vals = np.empty((3,t))
        diff_vals = np.empty((2,t))

        for i in range(t):
            print("\b"*len(str(i-1)) + str(i), end='')
            sys.stdout.flush()

            # get data
            params, votes = datasets[i]
            votes_curr = votes[:n]

            # top3_full GMM (20 moments)
            time_val = time.perf_counter()
            soln, t0, t1 = gmmagg.aggregate(rankings = votes_curr,
                                            ##rankings = None, # for ground-truth empirical limit
                                            algorithm = "top3_full",
                                            epsilon = None,
                                            max_iters = None,
                                            approx_step = None,
                                            opto = "matlab",
                                            ##opto = "matlab_emp", # for ground-truth empirical limit
                                            true_params = None
                                            ##true_params = params # for ground-truth empirical limit
                                           )
            time_val = time.perf_counter() - time_val
            wsse_val = stats.mix2PL_wsse(params, soln, m)
            sse_val = stats.mix2PL_sse(params, soln, m)
            wsse_vals[0][i] = wsse_val
            sse_vals[0][i] = sse_val
            time_vals[0][i] = t0
            time_vals[1][i] = t1
            time_vals[2][i] = time_val
            gmm_result = gmm_mixpl.GMMMixPLResult(num_alts = m,
                                                  num_votes = n,
                                                  ##num_votes = 0, # ground-truth empirical limit
                                                  num_mix = 2,
                                                  true_params = params,
                                                  cond = "top3_full",
                                                  opto = "matlab",
                                                  ##opto = "matlab_emp",  # ground-truth empirical limit
                                                  soln_params = soln,
                                                  momnts_runtime = t0,
                                                  opto_runtime = t1,
                                                  overall_runtime = time_val
                                                 )
            gmm_solns.append(gmm_result)

            # EMM
            time_val = time.perf_counter()
            emm_pi, emm_p, pi_0, p_0 = emmagg.aggregate(votes_curr,
                                                        K=2,
                                                        epsilon=emm_epsilon,
                                                        max_iters=emm_iters,
                                                        epsilon_mm=emm_epsilon,
                                                        max_iters_mm=mm_iters
                                                       )
            time_val = time.perf_counter() - time_val
            soln = np.hstack((emm_pi[0], emm_p[0], emm_p[1]))
            wsse_val = stats.mix2PL_wsse(params, soln, m)
            sse_val = stats.mix2PL_sse(params, soln, m)
            wsse_vals[1][i] = wsse_val
            sse_vals[1][i] = sse_val
            time_vals[3][i] = time_val
            emm_result = emm.EMMMixPLResult(num_alts = m,
                                            num_votes = n,
                                            num_mix = 2,
                                            true_params = params,
                                            epsilon = emm_epsilon,
                                            max_iters = emm_iters,
                                            epsilon_mm = emm_epsilon,
                                            max_iters_mm = mm_iters,
                                            init_guess = np.hstack((
                                                pi_0[0],
                                                p_0[0],
                                                p_0[1]
                                            )),
                                            soln_params = soln,
                                            runtime = time_val
                                           )
            emm_solns.append(emm_result)

            # t-test differences
            diff_vals[0][i] = wsse_vals[1][i] - wsse_vals[0][i]
            diff_vals[1][i] = sse_vals[1][i] - sse_vals[0][i]

        print()
        wsse_res[k_n][0] = n
        wsse_res[k_n][1] = np.mean(wsse_vals[0]) # GMM
        wsse_res[k_n][2] = np.mean(wsse_vals[1]) # EMM

        sse_res[k_n][0] = n
        sse_res[k_n][1] = np.mean(sse_vals[0]) # GMM
        sse_res[k_n][2] = np.mean(sse_vals[1]) # EMM

        time_res[k_n][0] = n
        time_res[k_n][1] = np.mean(time_vals[0]) # GMM t0 (moment-calc)
        time_res[k_n][2] = np.mean(time_vals[1]) # GMM t1 (optimization)
        time_res[k_n][3] = np.mean(time_vals[2]) # GMM overall time
        time_res[k_n][4] = np.mean(time_vals[3]) # EMM time

        # GMM vs EMM WSSE
        ttest_vals[0][k_n][0] = n
        ttest_vals[0][k_n][1] = np.mean(diff_vals[0])
        ttest_vals[0][k_n][2] = np.std(diff_vals[0])

        # GMM vs EMM SSE
        ttest_vals[1][k_n][0] = n
        ttest_vals[1][k_n][1] = np.mean(diff_vals[1])
        ttest_vals[1][k_n][2] = np.std(diff_vals[1])

        # write results intermediately after a full set of trials for each n
        pickle.dump(gmm_solns, gmm_solns_file)
        pickle.dump(emm_solns, emm_solns_file)

        k_n += 1

    pickle.dump(gmm_solns, gmm_solns_file)
    gmm_solns_file.close()
    pickle.dump(emm_solns, emm_solns_file)
    emm_solns_file.close()
    np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n")
    wsse_file.close()
    np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n")
    sse_file.close()
    np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n")
    time_file.close()
    np.savetxt(ttest_wsse_filename, ttest_vals[0], delimiter=',', newline="\r\n")
    ttest_wsse_file.close()
    np.savetxt(ttest_sse_filename, ttest_vals[1], delimiter=',', newline="\r\n")
    ttest_sse_file.close()

    plot.plot_wsse_time_data(str_error_type="MSE",
                             error_results=wsse_res,
                             time_results=time_res,
                             output_img_filename=plot_filename
                            )
    plot_file.close()
Пример #5
0
def main(argv):
    if len(argv) != 14:
        print_usage(argv[0])
    m = int(argv[1])  # number of alternatives
    t = int(
        argv[2]
    )  # (index + 1) of final dataset (number of datasets if base filename is 0...00)
    n_init = int(argv[3])  # initial experiment number of votes
    if not n_init > 0:
        print("Error: Starting number of votes must be greater than 0")
        print_usage(argv[0])
    n_stop = int(argv[4])  # final experiment number of votes
    if not n_stop > n_init:
        print(
            "Error: Final number of votes must be greater than starting number of votes"
        )
        print_usage(argv[0])
    n_step = int(argv[5])  # number of votes to increment by each time
    if not n_step > 0:
        print("Error: Step number of votes must be greater than 0")
        print_usage(argv[0])
    elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0:
        print("Warning: Step number of votes doesn't fit range")
    p = ((n_stop - n_init) // n_step) + 1  # always positive and >= 1 by above

    # Set EMM algorithm iterations & convergence parameters:
    emm_epsilon = None  # TODO: add option to specify on command-line
    mm_epsilon = None  # TODO: add option to specify on command-line
    tot_iters = int(argv[6])
    if tot_iters < 1:
        print("Error: Invalid argument for total sum of iterations")
        print_usage(argv[0])
    em_iters = int(argv[7])
    if em_iters < 1:
        print("Error: Invalid argument for EM iterations")
        print_usage(argv[0])
    if em_iters > tot_iters:
        print("Error: Total iterations less than EM iterations")
        print_usage(argv[0])

    # read in all data required for experiments
    print("Reading Datasets from Disk...")
    datasets = []
    data_filename_base = argv[8]
    d = int(data_filename_base.split("_")[-1])
    if d < 0:
        print(
            "Error: dataset base file name must not contain a negative number")
        print_usage(argv[0])
    len_d = str(len(data_filename_base.split("_")[-1]))
    data_filename_base = "_".join(data_filename_base.split("_")[:-1])
    for i in range(t):
        infilename = data_filename_base + '_' + ("{0:0" + len_d +
                                                 "d}").format(i + d) + ".csv"
        infile = open(infilename)
        datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop))

    # Check files can be written to later:
    wsse_filename = argv[9]
    wsse_file = open(wsse_filename, 'w')
    sse_filename = argv[10]
    sse_file = open(sse_filename, 'w')
    time_filename = argv[11]
    time_file = open(time_filename, 'w')
    plot_filename = argv[12]
    plot_file = open(plot_filename, 'w')
    emm_solns_filename = argv[13]
    emm_solns_file = open(emm_solns_filename, 'wb')  # writable binary mode

    # open previous experiments results files
    orig_error_results = np.loadtxt(
        "../../MixPL_GmmExperiments/mse_mixPL_04-alts_2000-trials_22.csv",
        delimiter=',')
    orig_time_results = np.loadtxt(
        "../../MixPL_GmmExperiments/time_mixPL_04-alts_2000-trials_22.csv",
        delimiter=',')

    wsse_res = np.empty((p, 2))
    sse_res = np.empty((p, 2))
    time_res = np.empty((p, 2))

    emm_solns = []

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    print("Initializing Aggregator Classes...")
    emmagg = emm.EMMMixPLAggregator(alts)

    print("Starting Experiments...")
    k_n = 0  # experiment index number
    for n in range(n_init, n_stop + 1, n_step):  # for these numbers of agents
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()

        wsse_vals = np.empty((1, t))
        sse_vals = np.empty((1, t))
        time_vals = np.empty((1, t))

        for i in range(t):
            print("\b" * len(str(i - 1)) + str(i), end='')
            sys.stdout.flush()

            # get data
            params, votes = datasets[i]
            votes_curr = votes[:n]

            # EMM
            time_val = time.perf_counter()
            emm_pi, emm_p, pi_0, p_0 = emmagg.aggregate(votes_curr,
                                                        K=2,
                                                        epsilon=emm_epsilon,
                                                        tot_iters=tot_iters,
                                                        epsilon_mm=mm_epsilon,
                                                        max_iters_em=em_iters)
            time_val = time.perf_counter() - time_val
            soln = np.hstack((emm_pi[0], emm_p[0], emm_p[1]))
            wsse_val = stats.mix2PL_wsse(params, soln, m)
            sse_val = stats.mix2PL_sse(params, soln, m)
            wsse_vals[0][i] = wsse_val
            sse_vals[0][i] = sse_val
            time_vals[0][i] = time_val
            emm_result = emm.EMMMixPLResult(
                num_alts=m,
                num_votes=n,
                num_mix=2,
                true_params=params,
                epsilon=emm_epsilon,
                max_iters="total:" + str(tot_iters),
                epsilon_mm=mm_epsilon,
                max_iters_mm="actually-EM:" + str(em_iters),
                init_guess=np.hstack((pi_0[0], p_0[0], p_0[1])),
                soln_params=soln,
                runtime=time_val)
            emm_solns.append(emm_result)

        print()
        wsse_res[k_n][0] = n
        wsse_res[k_n][1] = np.mean(wsse_vals[0])  # EMM

        sse_res[k_n][0] = n
        sse_res[k_n][1] = np.mean(sse_vals[0])  # EMM

        time_res[k_n][0] = n
        time_res[k_n][1] = np.mean(time_vals[0])  # EMM

        # write results intermediately after a full set of trials for each n
        pickle.dump(emm_solns, emm_solns_file)

        k_n += 1

    pickle.dump(emm_solns, emm_solns_file)
    emm_solns_file.close()
    np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n")
    wsse_file.close()
    np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n")
    sse_file.close()
    np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n")
    time_file.close()

    plot.plot_error_time_data(str_error_type="MSE",
                              iter_1=em_iters,
                              iter_2=tot_iters // em_iters,
                              error_results=sse_res,
                              time_results=time_res,
                              orig_error_results=orig_error_results,
                              orig_time_results=orig_time_results,
                              output_img_filename=plot_filename)
    plot_file.close()
Пример #6
0
def main(argv):
    if len(argv) < 8:
        print_usage(argv[0])
    m = int(argv[1])  # number of alternatives
    t = int(argv[2])  # number of trials
    a_init = float(argv[3])  # starting and stepping value for alpha
    p = int(argv[4])  # number of times to increment by a_init

    # Check files can be written to later:
    wsse_filename = argv[5]
    wsse_file = open(wsse_filename, 'w')
    wsse_file.close()
    sse_filename = argv[6]
    sse_file = open(sse_filename, 'w')
    sse_file.close()
    plot_filename = argv[7]
    plot_file = open(plot_filename, 'w')
    plot_file.close()

    # Generate data sets with Dirichlet by default:
    useDirich = True
    if len(argv) > 8 and argv[8] == "-U":
        useDirich = False

    results = np.empty((2, p, 3))  # 2 statistics X p points X 3 observations

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True)

    opto_fails = 0
    for n in range(p):
        alpha = a_init * (n + 1)
        np.random.seed(0)
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()
        wsse_vals = np.empty(t)
        sse_vals = np.empty(t)
        for i in range(t):
            print("\b" * len(str(i - 1)) + str(i), end='')
            sys.stdout.flush()

            while True:
                try:
                    # generate ground-truths
                    gamma1 = np.random.dirichlet(np.ones(m))
                    gamma2 = np.random.dirichlet(np.ones(m))
                    params = np.hstack((alpha, gamma1, gamma2))

                    # MatLab top3_full GMM (20 moments)
                    soln = gmmagg.aggregate(rankings=None,
                                            algorithm="top3_full",
                                            epsilon=None,
                                            max_iters=None,
                                            approx_step=None,
                                            opto="matlab3",
                                            true_params=params)
                    wsse_val = stats.mix2PL_wsse(params, soln, m)
                    sse_val = stats.mix2PL_sse(params, soln, m)
                    wsse_vals[i] = wsse_val
                    sse_vals[i] = sse_val

                except FloatingPointError:  # bad data (objective function NaN)
                    opto_fails += 1
                    continue  # retry
                except ValueError:  # bad data (votes arrays number of dimensions mismatch)
                    opto_fails += 1
                    continue  # retry
                break  # good data!

        print()
        # store WSSE values
        results[0][n][0] = alpha
        results[0][n][1] = np.mean(wsse_vals)
        results[0][n][2] = np.std(wsse_vals)

        # store SSE values
        results[1][n][0] = alpha
        results[1][n][1] = np.mean(sse_vals)
        results[1][n][2] = np.std(sse_vals)

    print("Optimization Failures Count: " + str(opto_fails))

    np.savetxt(wsse_filename, results[0], delimiter=',', newline="\r\n")
    np.savetxt(sse_filename, results[1], delimiter=',', newline="\r\n")
    plot.plot_wsse_sse_data(results[0], results[1], plot_filename)
def main(argv):
    if len(argv) < 8:
        print_usage(argv[0])
    m = int(argv[1]) # number of alternatives
    t = int(argv[2]) # number of trials
    a_init = float(argv[3]) # starting and stepping value for alpha
    p = int(argv[4]) # number of times to increment by a_init

    # Check files can be written to later:
    wsse_filename = argv[5]
    wsse_file = open(wsse_filename, 'w')
    wsse_file.close()
    sse_filename = argv[6]
    sse_file = open(sse_filename, 'w')
    sse_file.close()
    plot_filename = argv[7]
    plot_file = open(plot_filename, 'w')
    plot_file.close()

    # Generate data sets with Dirichlet by default:
    useDirich = True
    if len(argv) > 8 and argv[8] == "-U":
        useDirich = False

    results = np.empty((2, p, 3)) # 2 statistics X p points X 3 observations

    alts = np.arange(m)

    # initialize the aggregators for each class of algorithm
    gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True)

    opto_fails = 0
    for n in range(p):
        alpha = a_init * (n + 1)
        np.random.seed(0)
        print("n =", n)
        print("i =   ", end='')
        sys.stdout.flush()
        wsse_vals = np.empty(t)
        sse_vals = np.empty(t)
        for i in range(t):
            print("\b"*len(str(i-1)) + str(i), end='')
            sys.stdout.flush()

            while True:
                try:
                    # generate ground-truths
                    gamma1 = np.random.dirichlet(np.ones(m))
                    gamma2 = np.random.dirichlet(np.ones(m))
                    params = np.hstack((alpha, gamma1, gamma2))

                    # MatLab top3_full GMM (20 moments)
                    soln = gmmagg.aggregate(rankings=None,
                                            algorithm="top3_full",
                                            epsilon=None,
                                            max_iters=None,
                                            approx_step=None,
                                            opto="matlab3",
                                            true_params=params
                                           )
                    wsse_val = stats.mix2PL_wsse(params, soln, m)
                    sse_val = stats.mix2PL_sse(params, soln, m)
                    wsse_vals[i] = wsse_val
                    sse_vals[i] = sse_val

                except FloatingPointError: # bad data (objective function NaN)
                    opto_fails += 1
                    continue # retry
                except ValueError: # bad data (votes arrays number of dimensions mismatch)
                    opto_fails += 1
                    continue # retry
                break # good data!

        print()
        # store WSSE values
        results[0][n][0] = alpha
        results[0][n][1] = np.mean(wsse_vals)
        results[0][n][2] = np.std(wsse_vals)

        # store SSE values
        results[1][n][0] = alpha
        results[1][n][1] = np.mean(sse_vals)
        results[1][n][2] = np.std(sse_vals)

    print("Optimization Failures Count: " + str(opto_fails))

    np.savetxt(wsse_filename, results[0], delimiter=',', newline="\r\n")
    np.savetxt(sse_filename, results[1], delimiter=',', newline="\r\n")
    plot.plot_wsse_sse_data(results[0], results[1], plot_filename)