示例#1
0
def learning_rates(folder,
                   modification='',
                   create=False,
                   learning_rates=np.arange(5, 1000, 5)):
    """
    Function used to generate or load t-SNE transformations with a range of different learning rates. 
    Parameters
    -------------
    folder: The name of the folder the pickles should be put in / are in
    modification: the modification done to the dataset, a string used in the name of the corresponding pickles. 
    learning_rates: the learning rates rates you want to create transformations of
    create: true is you want to create the transformations, false if you want to load them
    pkl: true if you want to make a pickle for every value of learning_rate, otherwise false 
    
    Output
    -------------
    l_Z: A ilst of the t-SNE transformations
    learning_rates: a vector with the corresponding values of learning rate
    l_times: a vector with the corresponding values of computational time
    l_kl_divergence: a vector with the corresponding values kl divergence
    l_differences: a vector with the corresponding values of difference in 2d distance
    """
    if create:
        l_Z = []
        l_times = np.zeros(len(learning_rates))
        l_kl_divergence = np.zeros(len(learning_rates))
        X = pickle.load(open(folder + "/X" + modification + ".pkl", "rb"))
        for i, l in enumerate(learning_rates):
            tsne = t_sne(learning_rate=l, random_state=123)
            start_time = time.time()
            l_Z.append(tsne.fit_transform(X))
            l_times[i] = time.time() - start_time
            l_kl_divergence[i] = tsne.kl_divergence_
        pickle.dump(l_Z,
                    open(folder + "/l_Z_tsne" + modification + ".pkl", "wb"))
        pickle.dump(
            learning_rates,
            open(folder + "/learning_rates" + modification + ".pkl", "wb"))
        pickle.dump(l_times,
                    open(folder + "/l_times" + modification + ".pkl", "wb"))
        pickle.dump(
            l_kl_divergence,
            open(folder + "/l_kl_divergence" + modification + ".pkl", "wb"))
    else:
        l_Z = pickle.load(
            open(folder + "/l_Z_tsne" + modification + ".pkl", "rb"))
        learning_rates = pickle.load(
            open(folder + "/learning_rates" + modification + ".pkl", "rb"))
        l_times = pickle.load(
            open(folder + "/l_times" + modification + ".pkl", "rb"))
        l_kl_divergence = pickle.load(
            open(folder + "/l_kl_divergence" + modification + ".pkl", "rb"))
    X_2d_tsne = pickle.load(
        open(folder + "/X_2d" + modification + ".pkl", "rb"))
    l_differences = HL.get_differences(X_2d_tsne, l_Z)
    return l_Z, learning_rates, l_times, l_kl_divergence, l_differences
示例#2
0
def early_exaggeration(folder,
                       modification='',
                       create=False,
                       early_exaggeration=np.arange(1, 80, 1)):
    """
    Function used to generate or load t-SNE transformations with a range of different early exaggeration rates. 
    Parameters
    -------------
    folder: The name of the folder the pickles should be put in / are in
    modification: the modification done to the dataset, a string used in the name of the corresponding pickles. 
    early_exaggeration: the early exaggeration rates you want to create transformations of
    create: true is you want to create the transformations, false if you want to load them
    pkl true if you want to make a pickle for every value of early_exaggeration, otherwise false 
    
    Output
    -------------
    e_Z: A ilst of the t-SNE transformations
    early_exaggeration: a vector with the corresponding values of early_exaggeration
    e_times: a vector with the corresponding values of computational time
    e_kl_divergence: a vector with the corresponding values kl divergence
    e_differences: a vector with the corresponding values of difference in 2d distance
    """
    if create:
        e_Z = []
        e_times = np.zeros(len(early_exaggeration))
        e_kl_divergence = np.zeros(len(early_exaggeration))
        X = pickle.load(open(folder + "/X" + modification + ".pkl", "rb"))
        for i, e in enumerate(early_exaggeration):
            tsne = t_sne(early_exaggeration=e, random_state=123)
            start_time = time.time()
            e_Z.append(tsne.fit_transform(X))
            e_times[i] = time.time() - start_time
            e_kl_divergence[i] = tsne.kl_divergence_
        pickle.dump(e_Z,
                    open(folder + "/e_Z_tsne" + modification + ".pkl", "wb"))
        pickle.dump(
            early_exaggeration,
            open(folder + "/early_exaggeration" + modification + ".pkl", "wb"))
        pickle.dump(e_times,
                    open(folder + "/e_times" + modification + ".pkl", "wb"))
        pickle.dump(
            e_kl_divergence,
            open(folder + "/e_kl_divergence" + modification + ".pkl", "wb"))
    else:
        e_Z = pickle.load(
            open(folder + "/e_Z_tsne" + modification + ".pkl", "rb"))
        early_exaggeration = pickle.load(
            open(folder + "/early_exaggeration" + modification + ".pkl", "rb"))
        e_times = pickle.load(
            open(folder + "/e_times" + modification + ".pkl", "rb"))
        e_kl_divergence = pickle.load(
            open(folder + "/e_kl_divergence" + modification + ".pkl", "rb"))
    X_2d_tsne = pickle.load(
        open(folder + "/X_2d" + modification + ".pkl", "rb"))
    e_differences = HL.get_differences(X_2d_tsne, e_Z)
    return e_Z, early_exaggeration, e_times, e_kl_divergence, e_differences
示例#3
0
def threshold(folder,
              modification='',
              create=False,
              threshold=np.logspace(-14, -1, 50)):
    """
    Function used to generate or load t-SNE transformations with a range of different thresholds (tol/min_grad_norm). 
    Parameters
    -------------
    folder: The name of the folder the pickles should be put in / are in
    modification: the modification done to the dataset, a string used in the name of the corresponding pickles. 
    threshold: the thresholds you want to create transformations of
    create: true is you want to create the transformations, false if you want to load them
    pkl: true if you want to make a pickle for every value of threshold, otherwise false 
    
    Output
    -------------
    t_Z: A ilst of the t-SNE transformations
    learning_rates: a vector with the corresponding values of learning rate
    t_times: a vector with the corresponding values of computational time
    t_kl_divergence: a vector with the corresponding values kl divergence
    t_differences: a vector with the corresponding values of difference in 2d distance
    """
    if create:
        t_Z = []
        t_times = np.zeros(len(threshold))
        t_kl_divergence = np.zeros(len(threshold))
        X = pickle.load(open(folder + "/X" + modification + ".pkl", "rb"))
        for i, t in enumerate(threshold):
            tsne = t_sne(min_grad_norm=t, random_state=123)
            start_time = time.time()
            t_Z.append(tsne.fit_transform(X))
            t_times[i] = time.time() - start_time
            t_kl_divergence[i] = tsne.kl_divergence_
        pickle.dump(t_Z,
                    open(folder + "/t_Z_tsne" + modification + ".pkl", "wb"))
        pickle.dump(threshold,
                    open(folder + "/threshold" + modification + ".pkl", "wb"))
        pickle.dump(t_times,
                    open(folder + "/t_times" + modification + ".pkl", "wb"))
        pickle.dump(
            t_kl_divergence,
            open(folder + "/t_kl_divergence" + modification + ".pkl", "wb"))
    else:
        t_Z = pickle.load(
            open(folder + "/t_Z_tsne" + modification + ".pkl", "rb"))
        threshold = pickle.load(
            open(folder + "/threshold" + modification + ".pkl", "rb"))
        t_times = pickle.load(
            open(folder + "/t_times" + modification + ".pkl", "rb"))
        t_kl_divergence = pickle.load(
            open(folder + "/t_kl_divergence" + modification + ".pkl", "rb"))
    X_2d_tsne = pickle.load(
        open(folder + "/X_2d" + modification + ".pkl", "rb"))
    t_differences = HL.get_differences(X_2d_tsne, t_Z)
    return t_Z, threshold, t_times, t_kl_divergence, t_differences
示例#4
0
def n_reg(folder=None,
          modification='',
          create=False,
          reg=np.logspace(-14, 10, 50),
          X=None,
          pkl=True,
          X_2d_lle=None):
    """
    Function used to generate or load LLE transformations with a range of different regularization term. 
    Parameters
    -------------
    folder: The name of the folder the pickles should be put in / are in
    modification: the modification done to the dataset, a string used in the name of the corresponding pickles. 
    reg: the values of the regularizatin term you want to create transformations of
    create: true is you want to create the transformations, false if you want to load them
    pkl: true if you want to make a pickle for every value of early_exaggeration, otherwise false 
    X: Dataset (3d swiss roll)
    X_2d_lle: corresponding 2d data
    
    Output
    -------------
   
    r_Y: A ilst of the LLE transformations
    reg: a vector with the corresponding values of the regularisation term
    r_times:a vector with the corresponding values of computational time
    r_reconstruction_error: a vector with the corresponding values of reconstruction error
    r_differences:a vector with the corresponding values of difference in 2d distance
    """
    if create:
        if X is None:
            X = pickle.load(open(folder + "/X" + modification + ".pkl", "rb"))
        n_components = 2
        neighbors = 12
        r_Y = []
        r_times = np.zeros(len(reg))
        r_reconstruction_error = np.zeros(len(reg))
        for i, r in enumerate(reg):
            LLE = lle(neighbors, n_components, reg=r, eigen_solver='auto')
            start_time = time.time()
            r_Y.append(LLE.fit_transform(X))
            r_times[i] = time.time() - start_time
            r_reconstruction_error[i] = LLE.reconstruction_error_
        if pkl:
            pickle.dump(
                r_Y, open(folder + "/r_Y_lle" + modification + ".pkl", "wb"))
            pickle.dump(reg, open(folder + "/reg" + modification + ".pkl",
                                  "wb"))
            pickle.dump(
                r_times, open(folder + "/r_times" + modification + ".pkl",
                              "wb"))
            pickle.dump(
                r_reconstruction_error,
                open(
                    folder + "/r_reconstruction_error" + modification + ".pkl",
                    "wb"))
    else:
        r_Y = pickle.load(
            open(folder + "/r_Y_lle" + modification + ".pkl", "rb"))
        reg = pickle.load(open(folder + "/reg" + modification + ".pkl", "rb"))
        r_times = pickle.load(
            open(folder + "/r_times" + modification + ".pkl", "rb"))
        r_reconstruction_error = pickle.load(
            open(folder + "/r_reconstruction_error" + modification + ".pkl",
                 "rb"))
    if X_2d_lle is None:
        X_2d_lle = pickle.load(
            open(folder + "/X_2d" + modification + ".pkl", "rb"))
    r_differences = HL.get_differences(X_2d_lle, r_Y)
    return r_Y, reg, r_times, r_reconstruction_error, r_differences
示例#5
0
def n_neighbors(folder=None,
                modification='',
                create=False,
                n_neighbors=np.arange(3, 60, 1),
                X=None,
                pkl=True,
                X_2d_lle=None):
    """
    Function used to generate or load LLE transformations with a range of different number of neighbours. 
    Parameters
    -------------
    folder: The name of the folder the pickles should be put in / are in
    modification: the modification done to the dataset, a string used in the name of the corresponding pickles. 
    n_neighbors: the number of neighbours you want to create transformations of
    create: true is you want to create the transformations, false if you want to load them
    pkl: true if you want to make a pickle for every value of early_exaggeration, otherwise false 
    X: Dataset (3d swiss roll)
    X_2d_lle: corresponding 2d data
    
    Output
    -------------
   
    n_Y: A ilst of the LLE transformations
    n_neighbors: a vector with the corresponding values of number of neighbours
    n_times:a vector with the corresponding values of computational time
    n_reconstruction_error: a vector with the corresponding values of reconstruction error
    n_differences:a vector with the corresponding values of difference in 2d distance
    """
    if create:
        n_components = 2
        if X is None:
            X = pickle.load(open(folder + "/X" + modification + ".pkl", "rb"))
        n_Y = []
        n_times = np.zeros(len(n_neighbors))
        n_reconstruction_error = np.zeros(len(n_neighbors))
        for i, n in enumerate(n_neighbors):
            LLE = lle(n, n_components, eigen_solver='auto')
            start_time = time.time()
            n_Y.append(LLE.fit_transform(X))
            n_times[i] = time.time() - start_time
            n_reconstruction_error[i] = LLE.reconstruction_error_
        if pkl:
            pickle.dump(
                n_Y, open(folder + "/n_Y_lle" + modification + ".pkl", "wb"))
            pickle.dump(
                n_neighbors,
                open(folder + "/n_neighbors" + modification + ".pkl", "wb"))
            pickle.dump(
                n_times, open(folder + "/n_times" + modification + ".pkl",
                              "wb"))
            pickle.dump(
                n_reconstruction_error,
                open(
                    folder + "/n_reconstruction_error" + modification + ".pkl",
                    "wb"))
    else:
        n_Y = pickle.load(
            open(folder + "/n_Y_lle" + modification + ".pkl", "rb"))
        #lle_color=pickle.load(open("lle_color.pkl", "rb"))
        n_neighbors = pickle.load(
            open(folder + "/n_neighbors" + modification + ".pkl", "rb"))
        n_times = pickle.load(
            open(folder + "/n_times" + modification + ".pkl", "rb"))
        n_reconstruction_error = pickle.load(
            open(folder + "/n_reconstruction_error" + modification + ".pkl",
                 "rb"))
    if X_2d_lle is None:
        X_2d_lle = pickle.load(
            open(folder + "/X_2d" + modification + ".pkl", "rb"))
    n_differences = HL.get_differences(X_2d_lle, n_Y)
    return n_Y, n_neighbors, n_times, n_reconstruction_error, n_differences
示例#6
0
def perplexity(folder=None,
               modification='',
               per=np.arange(2, 150, 2),
               create=False,
               pkl=True,
               X=None,
               X_2d_tsne=None):
    """
    Function used to generate or load t-SNE transformations with a range of different perplexities. 
    Parameters
    -------------
    folder: The name of the folder the pickles should be put in / are in
    modification: the modification done to the dataset, a string used in the name of the corresponding pickles. 
    per: the perplexities you want to create transformations of
    create: true is you want to create the transformations, false if you want to load them
    pkl: true if you want to make a pickle for every value of per, otherwise false
    X: the data you want to transform
    X_2d_tsne: the original 2D version of X
    
    Output
    -------------
    p_Z: A ilst of the t-SNE transformations
    per: a vector with the corresponding values of perplexity
    p_times: a vector with the corresponding values of computational time
    p_kl_divergence: a vector with the corresponding values kl divergence
    p_differences: a vector with the corresponding values of difference in 2d distance
    
    """
    if create:
        p_Z = []
        p_times = np.zeros(len(per))
        p_kl_divergence = np.zeros(len(per))
        if X is None:
            X = pickle.load(open(folder + "/X" + modification + ".pkl", "rb"))
        for i, p in enumerate(per):
            tsne = t_sne(perplexity=p, random_state=123)
            start_time = time.time()
            p_Z.append(tsne.fit_transform(X))
            p_times[i] = time.time() - start_time
            p_kl_divergence[i] = tsne.kl_divergence_
        if pkl:
            pickle.dump(
                p_Z, open(folder + "/p_Z_tsne" + modification + ".pkl", "wb"))
            pickle.dump(per, open(folder + "/per" + modification + ".pkl",
                                  "wb"))
            pickle.dump(
                p_times, open(folder + "/p_times" + modification + ".pkl",
                              "wb"))
            pickle.dump(
                p_kl_divergence,
                open(folder + "/p_kl_divergence" + modification + ".pkl",
                     "wb"))
    else:
        p_Z = pickle.load(
            open(folder + "/p_Z_tsne" + modification + ".pkl", "rb"))
        per = pickle.load(open(folder + "/per" + modification + ".pkl", "rb"))
        p_times = pickle.load(
            open(folder + "/p_times" + modification + ".pkl", "rb"))
        p_kl_divergence = pickle.load(
            open(folder + "/p_kl_divergence" + modification + ".pkl", "rb"))
    if X_2d_tsne is None:
        X_2d_tsne = pickle.load(
            open(folder + "/X_2d" + modification + ".pkl", "rb"))
    p_differences = HL.get_differences(X_2d_tsne, p_Z)
    return p_Z, per, p_times, p_kl_divergence, p_differences