Пример #1
0
 def getfitness(self):
     from SVD import SVD
     svd = SVD(size)
     f_tikhonov = svd.f_tikhonov(self.lam, self.AM, self.b.T)
     fvalue = 0.0
     for f_tikobj in f_tikhonov:
         fvalue += f_tikobj * f_tikobj
     return np.std(f_tikhonov, dtype=np.float64)/np.mean(f_tikhonov)
Пример #2
0
 def getfitness(self):
     from SVD import SVD
     svd = SVD(size)
     f_tikhonov = svd.f_tikhonov(self.lam, self.AM, self.b.T)
     fvalue = 0.0
     for f_tikobj in f_tikhonov:
         fvalue += f_tikobj * f_tikobj
     return np.std(f_tikhonov, dtype=np.float64) / np.mean(f_tikhonov)
Пример #3
0
def Compute_U(train, C_frob, R_frob):
    W_matrix = [[train[int(i[0]), int(j[0])] for j in C_frob] for i in R_frob]

    X, Y, Sig1 = SVD(np.array(W_matrix), percent_energy_retain=100)

    Sig = np.diag(Sig1)

    Sigma_sum = np.sum(Sig)
    print(type(Sigma_sum))
    print(Sigma_sum)
    print(Sigma_sum)
    Sigma_sum *= 0.9999
    x = 0
    t = 0
    for i in range(len(Sig)):
        t = i
        if x > Sigma_sum:
            break
        else:
            x += Sig[i]

    print("Shape of X ::  Shape of Y", X.shape, " :: ", Y.shape)

    for i in range(t):
        X = np.delete(X, len(Sig) - 1, 1)
        Y = np.delete(Y, len(Sig) - 1, 0)
        Sig = np.delete(Sig, len(Sig) - 1)

    print("New Sigma Shape :: ", Sig.shape)
    print("New Sigma Bro", len(Sig))
    #time.sleep(10)
    q = len(Sig)

    print("Shape of X ::  Shape of Y", X.shape, " :: ", Y.shape)

    Psuedo_inv = Y.transpose()
    ''' Translating Sigma(1-d) to Sigma(Diagonal Matrix)  '''

    print("New sigma :: ", Sig.shape)
    Sig_inv = np.diagflat(Sig)

    print("E:: ", Sig_inv[q - 1, q - 1])
    Sig_inv = np.linalg.inv(Sig_inv)

    print("Sigma INverse :: ", Sig_inv.shape)

    print("N:: ", Sig_inv[q - 1, q - 1])
    #time.sleep(10)

    Sig_inv = np.matmul(Sig_inv, Sig_inv.T)

    print("Sigma Shape :: ", Sig_inv.shape)
    Psuedo_inv = np.matmul(Psuedo_inv, Sig_inv)
    Psuedo_inv = np.matmul(Psuedo_inv, X.transpose())
    U = Psuedo_inv
    return U
Пример #4
0
 def train(self, maxK, maxLambda, sampleNumber=200):
     """
     :param maxK: must larger than 1
     :param maxLambda: must larger than 0
     :param sampleNumber : sample number in one sampling
     :return:
     """
     print("Bayesian training start.")
     self.X.append([self.k, self.lambdaR])
     self.y.append(self.estimator_Calculate())
     self.model = self.model.fit(X=np.array(self.X), y=np.array(self.y))
     for t in range(self.trainingTimes):
         print("#################")
         print("It is at " + str(t) + " training.")
         Xsamples = [[
             self.randomSearchK(1, maxK),
             self.randomSearchLambda(0., maxLambda)
         ] for _ in range(sampleNumber)]
         #print(Xsamples)
         p = np.random.rand(1)
         print("P value : ", p)
         if p <= self.p:
             nextX = self.maxAcquisition(Xsamples)
             self.k = nextX[0]
             print("The next k value is : ", self.k)
             self.lambdaR = nextX[1]
             print("The next lambda value is : ", self.lambdaR)
         else:
             index = int(np.random.rand(1) * len(Xsamples))
             nextX = Xsamples[index]
             self.k = nextX[0]
             print("The next k value is : ", self.k)
             self.lambdaR = nextX[1]
             print("The next lambda value is : ", self.lambdaR)
         self.svd = SVD(self.matrix,
                        int(self.k),
                        biasSVD=self.biasSVD,
                        prediction=True,
                        regularization=self.lambdaR,
                        trainingTimes=self.maxSVDTrainingTimes,
                        device=self.device,
                        learning_rate=self.lr)
         actual = self.estimator_Calculate()
         self.X.append([self.k, self.lambdaR])
         self.y.append(actual)
         print(np.array(self.X))
         print(np.array(self.y))
         self.model = self.model.fit(np.array(self.X), y=np.array(self.y))
Пример #5
0
    def dimensionality_reduction(self):
        # self.set_database_matrix()
        # Note : when we have number of images <=20 or features <=20 , we are getting an error
        # this is because the database_matrix has <=20 images and the reduction models,
        # should have n_components parameters <= n,m
        # Hence, we have to take the min(min(len(self.database_matrix[0]),len(self.database_matrix)),20)
        if self.decomposition_name == 'PCA':
            self.decomposition_model = PCAModel(self.database_matrix, self.k_components, self.database_image_id)
        elif self.decomposition_name == 'SVD':
            self.decomposition_model = SVD(self.database_matrix, self.k_components, self.database_image_id)
        elif self.decomposition_name == 'NMF':
            self.decomposition_model = NMFModel(self.database_matrix, self.k_components, self.database_image_id)
        elif self.decomposition_name == 'LDA':
            self.decomposition_model = LDAModel(self.database_matrix, self.k_components, self.database_image_id)

        self.decomposition_model.decompose()
        print('Decomposition Complete')
        decomposed_database_matrix = self.decomposition_model.get_decomposed_data_matrix()
        reduced_dimension_folder_images_dict = {}
        for image_id, reduced_feature_vector in zip(self.database_image_id, decomposed_database_matrix):
            reduced_dimension_folder_images_dict[image_id] = reduced_feature_vector
        if self.metadata_label != '':
            misc.save2pickle(reduced_dimension_folder_images_dict, self.reduced_pickle_file_folder,
                             feature=(self.feature_extraction_model_name+'_'+self.decomposition_name+
                                      '_' + self.metadata_label))
        else:
            misc.save2pickle(reduced_dimension_folder_images_dict, self.reduced_pickle_file_folder,
                             feature=(self.feature_extraction_model_name + '_' + self.decomposition_name))
Пример #6
0
    def __init__(self,
                 matrix,
                 iniK,
                 iniLambda,
                 device=torch.device("cpu"),
                 biasSVD=True,
                 lr=1e-3,
                 pValue=0.75,
                 maxSVDTrainingTimes=50000,
                 maxBayesianTrainingTimes=150):

        ### if there are zeros in matrix, the position which value is zero will be predicted.
        ### we need to add a small number to prevent this matrix has zero values.
        self.matrix = np.array(matrix, dtype=np.float32) + 1e-4
        self.m = self.matrix.shape[0]
        self.n = self.matrix.shape[1]
        self.lr = lr
        self.p = pValue
        rand = np.random.rand(self.m, self.n)
        ### 80% as training set and 20% as testing set.
        self.oneMask = np.array(rand >= 0.2, dtype=np.float32)
        self.matrix = self.matrix * self.oneMask
        self.gp_kernel = gp.kernels.RBF() + gp.kernels.WhiteKernel(1e-1)
        self.model = gp.GaussianProcessRegressor(kernel=self.gp_kernel)
        self.k = iniK
        self.lambdaR = iniLambda
        self.maxSVDTrainingTimes = maxSVDTrainingTimes
        self.biasSVD = biasSVD
        self.svd = SVD(self.matrix,
                       self.k,
                       biasSVD=biasSVD,
                       prediction=True,
                       regularization=self.lambdaR,
                       trainingTimes=maxSVDTrainingTimes,
                       device=device,
                       learning_rate=lr)
        self.device = device
        self.trainingTimes = maxBayesianTrainingTimes
        self.X = []
        self.y = []
        self.est = []
        self.predict = []
Пример #7
0
 def __init__(self, modelInstance):
     self.model = modelInstance
     features = [
         cosine_similarity.CosineSimilarity(),
         n_gram_matching.NGramMatching(),
         sentiment_feature.SentimentFeature(),
         SVD.SVD(),
         TFIDF.TFIDF(),
         baseline_features.BaselineFeature(),
         cue_words.CueWords()
     ]
     self.features_train = np.hstack(
         [feature.read() for feature in features])
     self.labels_train = DataSet(path="../FNC-1").get_labels()
     self.features_test = np.hstack(
         [feature.read('competition_test') for feature in features])
     self.labels_test = DataSet(path="../FNC-1",
                                name="competition_test").get_labels()
Пример #8
0
        rawcount = 0
        sumAM = 0.0
        for rawobject in rawAMarray:
            if rawcount > 135*k and rawcount < 135*(k+1):
                sumAM = sumAM + rawobject
            rawcount = rawcount+1
        sumAM = sumAM/135.0
        AMarray = np.append(AMarray,sumAM)
    for j in range(0,n):
        #AMarray = AMarray[::-1]
        AM[i,j] = AMarray[j]/n
print(AM)
#noise = np.random.normal(0,1,(n,n))
#AM = AM +noise/1000
#TSVD method
svdclass=SVD(n)
g = iqewl
f_tsvd = svdclass.f_tsvd(3,AM,g.T)
f_tikhonov =svdclass.f_tikhonov(0.05,AM,g.T)
utb, utbs=svdclass.picardparameter(AM,g.T)
U, s, V =svdclass.svdmatrix(AM)

from pylab import *
x= np.arange(0,17)

ax1 = subplot(111)
#ax1.set_yscale('log')
#ax.set_xscale('log')
ax1.scatter (x/17.0,f_tsvd,marker='o',label='TSVD Regularization',color='black')
#ax1.scatter (x,s,marker='o',label=r"${\sigma _i}$",color='black')
#ax1.scatter (x,abs(utb),marker='o',label='$ {u_i^TP}$',color='red')
Пример #9
0
def CUR_decompoaition_with_replacement(selected_Columns, selected_Rows):
    print("List of Selected Columns")
    print(selected_Columns)

    sel_frob_c = [forbenius_norm_matrix_col[i] for i in selected_Columns]

    sel_frob_r = [forbenius_norm_matrix_row[i] for i in selected_Rows]

    R_frob = np.column_stack((selected_Rows, sel_frob_r))
    C_frob = np.column_stack((selected_Columns, sel_frob_c))

    print("No of Columns Considered : ", len(C_frob))

    print("Matrix_C of CUR ::")
    try_C = train[:, selected_Columns]
    print(try_C.shape)
    Matrix_C = [[((train[i, y[0]]) / (sqrt(no_of_param * y[1])))
                 for y in C_frob]
                for i in range(len(forbenius_norm_matrix_row))]
    mat_c = np.array(Matrix_C)

    print(len(Matrix_C), " , ", len(Matrix_C[0]))

    R_frob = R_frob[:no_of_param]

    print("No of Rows Considered : ", len(R_frob))

    print("Matrix_R of CUR")
    try_R = train[selected_Rows, :]
    print("Try_r")
    print(try_R.shape)
    Matrix_R = [[(train[int(y[0]), i]) / (sqrt(no_of_param * y[1]))
                 for i in range(len(forbenius_norm_matrix_col))]
                for y in R_frob]
    print(len(Matrix_R), " , ", len(Matrix_R[0]))

    print("Matrix_W of CUR")
    W_matrix = [[train[int(i[0]), int(j[0])] for j in C_frob] for i in R_frob]

    print("Calculating the SVD of W matrix")

    X, Y, Sig1 = SVD(W_matrix)

    Sig = np.diag(Sig1)
    print("Shape of W matrix svd matrix")
    print(X.shape, " ", Sig.shape, " ", Y.shape)

    Psuedo_inv = Y.transpose()
    Sig_inv = np.diagflat(Sig)

    Sig_inv = np.linalg.inv(Sig_inv)

    Sig_inv = np.matmul(Sig_inv, Sig_inv)

    Psuedo_inv = np.matmul(Psuedo_inv, Sig_inv)
    Psuedo_inv = np.matmul(Psuedo_inv, X.transpose())

    mat_c = np.array(Matrix_C)
    mat_r = np.array(Matrix_R)
    print(mat_c.shape, " ", Psuedo_inv.shape, " ", mat_r.shape)
    Cur_mat = np.matmul(Matrix_C, Psuedo_inv)
    Cur_mat = np.matmul(Cur_mat, Matrix_R)

    print("Final Matrix Shape with replacement as true")

    print("Final Matrix Shape")
    print(Cur_mat.shape)

    print(Cur_mat[0, 0])

    Cur_mat = np.add(Cur_mat, means_mat)

    evaluation.rmse(Cur_mat)
Пример #10
0
        rawcount = 0
        sumAM = 0.0
        for rawobject in rawAMarray:
            if rawcount > 617*k and rawcount < 617*(k+1):
                sumAM = sumAM + rawobject
            rawcount = rawcount+1
        sumAM = sumAM/617.0
        AMarray = np.append(AMarray,sumAM)
    for j in range(0,n):
        #AMarray = AMarray[::-1]
        AM[i,j] = AMarray[j]/n
print('readingAM')
noise = np.random.normal(0,1,(n,n))
#AM = AM +noise/100
#TSVD method
svdclass=SVD(n)
g = iqedataarray/100.0
f_tsvd = svdclass.f_tsvd(4,AM,g.T)
f_tikhonov =svdclass.f_tikhonov(0.05,AM,g.T)
utb, utbs=svdclass.picardparameter(AM,g.T)


from pylab import *
x= np.arange(0,81)

ax1 = subplot(111)
#ax1.set_yscale('log')
#ax.set_xscale('log')
reconstructioniqe = np.dot(AM,f_tsvd)
ax1.scatter (x/81.0,f_tsvd,marker='o',label='reconstruction',color='black')
#ax1.plot (x/81.0,iqedataarray/100.0,label='measured',color='red')
Пример #11
0
    QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_10x10x16_012.mat'
       ),
    QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_20x20x12_012.mat'
       ),
    QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_20x20x24_012.mat'
       ),
    QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_20x20x32_012.mat'
       )
]  #, QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_50x50x16_012.mat'), QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_50x50x24_012.mat'), QR('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_50x50x32_012.mat')]

QR_times = [
    QR_answers[0][1], QR_answers[1][1], QR_answers[2][1], QR_answers[3][1]
]  #, QR_answers[4][1]], QR_answers[5][1], QR_answers[6][1]]

SVD_answers = [
    SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_10x10x16_012.mat'
        ),
    SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_20x20x12_012.mat'
        ),
    SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_20x20x24_012.mat'
        ),
    SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_20x20x32_012.mat'
        )
]  #, SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_50x50x16_012.mat'), SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_50x50x24_012.mat'), SVD('/Users/christopher/Documents/anaII/rte_matrix/mat/kelp1_50x50x32_012.mat')]

SVD_times = [
    SVD_answers[0][1], SVD_answers[1][1], SVD_answers[2][1], SVD_answers[3][1]
]  #, SVD_answers[4][1]], SVD_answers[5][1], SVD_answers[6][1]]

matrix_sizes = [
    len(QR_answers[0][0]),
    len(QR_answers[1][0]),
        relevant_projects, known_user_likes_test = get_precision_and_recall_by_time_split(
            user, 10, algorithm, None)
        precision_recall_by_k = precision_recall(user, known_user_likes_test,
                                                 relevant_projects, algorithm,
                                                 k_values)
        results.append(precision_recall_by_k)  # ip_addresses[i%4]))
        i += 1
    for k in range(len(k_values)):
        print("algorithm:" + str(algorithm) + " k " + str(k))
        print(str([i[k][0] for i in results if i[k][0] >= 0]) + "\n")
        print(str([i[k][1] for i in results if i[k][1] >= 0]) + "\n")
        precisions = np.mean([i[k][0] for i in results if i[k][0] >= 0])
        recalls = np.mean([i[k][1] for i in results if i[k][1] >= 0])
        special_precisions = np.mean(
            [i[k][2] for i in results if i[k][2] >= 0])
        print("algorithm:", algorithm, "k", k)
        print(precisions, recalls, special_precisions)


if __name__ == '__main__':
    print(Recommender.data_items.shape)
    print(data_items_train.shape)
    # for ip change get_recommendation param
    # precision_recall_at_k([1, 5, 7, 10], Recommender.data['user'].values, CFUserUser(data_items_train))
    precision_recall_at_k([1, 3, 5, 7, 10], Recommender.data['user'].values,
                          CFItemItem(data_items_train))
    precision_recall_at_k([1, 3, 5, 7, 10], Recommender.data['user'].values,
                          PopularityBased(data_items_train))
    precision_recall_at_k([1, 3, 5, 7, 10], Recommender.data['user'].values,
                          SVD(data_items_train))
Пример #13
0
    for k in range(0,n):
        rawcount = 0
        sumAM = 0.0
        for rawobject in rawAMarray:
            if rawcount > discreteno*k and rawcount < discreteno*(k+1):
                sumAM = sumAM + rawobject
            rawcount = rawcount+1
        sumAM = sumAM/discreteno
        AMarray = np.append(AMarray,sumAM)
    for j in range(0,n):
        #AMarray = AMarray[::-1]
        AM[i,j] = AMarray[j]/n
noise = np.random.normal(0,1,(n,n))
#AM = AM +noise/100
#TSVD method
svdclass=SVD(n)
g = iqewl
#f_tsvd = svdclass.f_tsvd(40,AM,g.T)
#f_tikhonov =svdclass.f_tikhonov(0.05,AM,g.T)
utb, utbs=svdclass.picardparameter(AM,g.T)
U, s, V =svdclass.svdmatrix(AM)

from pylab import *
x= np.arange(0,n)

ax1 = subplot(111)
ax1.set_yscale('log')
#ax.set_xscale('log')
ax1.scatter (x,abs(utbs),marker='o',label='TSVD Regularization',color='black')

ax1.set_xlabel('Normalize position in CIGS layer',fontsize=15)
Пример #14
0
    for k in range(0, n):
        rawcount = 0
        sumAM = 0.0
        for rawobject in rawAMarray:
            if rawcount > discreteno * k and rawcount < discreteno * (k + 1):
                sumAM = sumAM + rawobject
            rawcount = rawcount + 1
        sumAM = sumAM / discreteno
        AMarray = np.append(AMarray, sumAM)
    for j in range(0, n):
        #AMarray = AMarray[::-1]
        AM[i, j] = AMarray[j] / n
noise = np.random.normal(0, 1, (n, n))
#AM = AM +noise/100
#TSVD method
svdclass = SVD(n)
g = iqewl
#f_tsvd = svdclass.f_tsvd(40,AM,g.T)
#f_tikhonov =svdclass.f_tikhonov(0.05,AM,g.T)
utb, utbs = svdclass.picardparameter(AM, g.T)
U, s, V = svdclass.svdmatrix(AM)

from pylab import *
x = np.arange(0, n)

ax1 = subplot(111)
ax1.set_yscale('log')
#ax.set_xscale('log')
ax1.scatter(x,
            abs(utbs),
            marker='o',
        results = []
        ip_addresses = ['']
        i=0
        for user in test_users:
            print (i)
            results.append(get_precision_and_recall_by_time_split(user, k, algorithm, ip_addresses[i%len(ip_addresses)]))
            i+=1
        precisions = np.mean([i[0] for i in results if i[0]>=0])
        recalls = np.mean([i[1] for i in results if i[1]>=0])
        special_precisions = np.mean([i[2] for i in results if i[2]>=0])
        print (precisions, recalls, special_precisions)


if __name__ == '__main__':
    print(Recommender.data_items.shape)
    print(data_items_train.shape)

    # for ip change get_recommendation param
<<<<<<< HEAD
    precision_recall_at_k([3], Recommender.data['user'].values, ContentBased(data_items_train, PopularityBased(data_items_train)))
    # precision_recall_at_k([3], Recommender.data['user'].values, CFItemItem(data_items_train))
    # precision_recall_at_k([3], Recommender.data['user'].values, CFUserUser(data_items_train))
    # precision_recall_at_k([3], Recommender.data['user'].values, PopularityBased(data_items_train))
    # precision_recall_at_k([3], Recommender.data['user'].values, SVD(data_items_train))
=======
    precision_recall_at_k([3], old_users, CFUserUser(data_items_train))
    precision_recall_at_k([3], Recommender.data['user'].values, CFItemItem(data_items_train))
    precision_recall_at_k([3], Recommender.data['user'].values, PopularityBased(data_items_train))
    precision_recall_at_k([3], Recommender.data['user'].values, SVD(data_items_train))
>>>>>>> master
Пример #16
0
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
import numpy as np
from timeit import default_timer as timer
from math import sqrt
import pickle
import Call_Function as cf
from SVD import SVD
import algo_common_func as ac

#default dir : './ml-100k'
cf.change_dir('../ml-100k')

n_user, n_movie, n_rating = cf.read_user_inform()

svd = SVD(n_user, 100, n_movie, 0, 0.1, 20)

self_instance = svd.get_self_instance()

ac.read_u_data(self_instance, cf.dir_location + '/u.data')

ac.get_overall_mean(self_instance)

RMSE_list = []
start = timer()
print('SVD Start')
for i in range(cf.k_fold):
    start_train = timer()
    svd.gradient_descent(cf.train_set_list[i])
    #print(i+1,'. fold training time : ',timer()-start_train)
    RMSE = svd.predict(cf.test_set_list[i])
Пример #17
0
                                    iniLambda,
                                    device=device,
                                    maxSVDTrainingTimes=70000,
                                    maxBayesianTrainingTimes=64,
                                    lr=1e-4,
                                    pValue=0.65)
    bayesOpt.train(maxK, maxLambda, sampleNumber=500)
    optim = bayesOpt.getBest()
    print(optim)
    k = optim[0]
    r = optim[1]
    ### 98.          0.27777305
    svd = SVD(matrix,
              int(k),
              device=torch.device("cuda"),
              biasSVD=True,
              prediction=True,
              regularization=r,
              trainingTimes=70000,
              learning_rate=1e-4)
    svd.train(verbose=True)
    finalResult = svd.prediction()
    m = finalResult.shape[0]
    n = finalResult.shape[1]
    with open("./predictionMatrix.txt", mode="w") as wh:
        for i in range(m):
            wh.write(colNames[i] + "\t")
            for j in range(n):
                wh.write(str(finalResult[i, j]) + "\t")
            wh.write("\n")
Пример #18
0
data_items = data.drop('user', 1)
data_items.columns = [int(x) for x in data_items.columns]
projects_info = pd.read_csv('projects_info.csv', index_col=0)
user_algorithm_mapping_df = pd.read_csv('user_algorithm_mapping.csv')
user_algorithm_mapping = {
    e.user_profile_id: e.algorithm
    for _, e in user_algorithm_mapping_df.iterrows()
}
non_active_projects = pd.read_csv('non_active_projects.csv')
algorithms = [PopularityBased(data_items)]
try:
    algorithms = [
        CFItemItem(data_items),
        CFUserUser(data_items),
        PopularityBased(data_items),
        SVD(data_items),
        Baseline()
    ]
except Exception as e:
    print("Exception in reading algorithms")
    algorithms = [PopularityBased(data_items)] * 5


def get_recommendations(user_profile_id, k, algorithm, ip_address):
    try:
        user_index_place = data[data['user'] == user_profile_id].index
        user_index = user_index_place[0] if len(user_index_place) > 0 else -1
        if user_index == -1 or len(
                get_user_projects(user_index)) < HISTORY_THRES:  # fresh user
            algorithm = PopularityBased(data_items)
        known_user_projects = get_user_projects(user_index)
Пример #19
0
            # generate outputs to hdfs
            temp = pr_vs_count.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path + 'pr_vs_count')

        if graph_statistics.getTotalDeg_vs_PR():
            total_degree_rdd = deg.statistics_compute(D, 'total')
            pr_rdd = pr.statistics_compute(D, Iter, 0.85, debug_mod)
            total_degree_vs_pr_rdd = total_degree_rdd.join(pr_rdd).map(
                lambda x: x[1])

            temp = total_degree_vs_pr_rdd.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path + 'total_degree_vs_pr')

        if graph_statistics.getSVD() == 1:

            svd = SVD()
            x_max = D.map(lambda x: x[0]).max() - 1
            y_max = D.map(lambda x: x[1]).max() - 1
            print(x_max, y_max)

            if x_max > y_max:
                D = D.map(lambda x: (x[1], x[0])).cache()
                temp = x_max
                x_max = y_max
                y_max = temp

            adj_list = ut.edgelist2Adj(D, x_max, y_max)
            adj_list_rdd = sc.parallelize(adj_list).cache()

            mat = RowMatrix(adj_list_rdd)
Пример #20
0
class BayesianOptimization(object):
    def __init__(self,
                 matrix,
                 iniK,
                 iniLambda,
                 device=torch.device("cpu"),
                 biasSVD=True,
                 lr=1e-3,
                 pValue=0.75,
                 maxSVDTrainingTimes=50000,
                 maxBayesianTrainingTimes=150):

        ### if there are zeros in matrix, the position which value is zero will be predicted.
        ### we need to add a small number to prevent this matrix has zero values.
        self.matrix = np.array(matrix, dtype=np.float32) + 1e-4
        self.m = self.matrix.shape[0]
        self.n = self.matrix.shape[1]
        self.lr = lr
        self.p = pValue
        rand = np.random.rand(self.m, self.n)
        ### 80% as training set and 20% as testing set.
        self.oneMask = np.array(rand >= 0.2, dtype=np.float32)
        self.matrix = self.matrix * self.oneMask
        self.gp_kernel = gp.kernels.RBF() + gp.kernels.WhiteKernel(1e-1)
        self.model = gp.GaussianProcessRegressor(kernel=self.gp_kernel)
        self.k = iniK
        self.lambdaR = iniLambda
        self.maxSVDTrainingTimes = maxSVDTrainingTimes
        self.biasSVD = biasSVD
        self.svd = SVD(self.matrix,
                       self.k,
                       biasSVD=biasSVD,
                       prediction=True,
                       regularization=self.lambdaR,
                       trainingTimes=maxSVDTrainingTimes,
                       device=device,
                       learning_rate=lr)
        self.device = device
        self.trainingTimes = maxBayesianTrainingTimes
        self.X = []
        self.y = []
        self.est = []
        self.predict = []

    def maxAcquisition(self, Xnew):
        Xnew = np.array(Xnew)
        fMaxT_1 = np.max(self.y)
        yHatMean, std = self.model.predict(Xnew, return_std=True)
        # print(yHatMean)
        # print(std)
        mu = np.reshape(yHatMean, [len(yHatMean)])
        scores = (mu - fMaxT_1) * norm.cdf(
            (mu - fMaxT_1) / (std + 1e-9)) + std * norm.pdf(
                (mu - fMaxT_1) / (std + 1e-9))
        ix = np.argmax(scores)
        self.predict.append(mu[ix])
        return Xnew[ix, :]

    def estimator_Calculate(self):
        print("Train Current SVD.")
        self.svd.train(verbose=True)
        zeroMask = np.array(self.matrix == 0, dtype=np.float32)
        mse = np.sum(np.abs(self.matrix - self.svd.prediction()) * zeroMask)
        print("Estimator is : ", 10000. / (mse + 1e-9))
        self.est.append(mse)
        return 10000. / (mse + 1e-9)

    @staticmethod
    def randomSearchK(minBoundary, maxBoundary):
        k = int((maxBoundary - minBoundary) * np.random.rand() + minBoundary)
        return k

    @staticmethod
    def randomSearchLambda(minBoundary, maxBoundary):
        l = (maxBoundary - minBoundary) * np.random.rand() + minBoundary
        return l

    def train(self, maxK, maxLambda, sampleNumber=200):
        """
        :param maxK: must larger than 1
        :param maxLambda: must larger than 0
        :param sampleNumber : sample number in one sampling
        :return:
        """
        print("Bayesian training start.")
        self.X.append([self.k, self.lambdaR])
        self.y.append(self.estimator_Calculate())
        self.model = self.model.fit(X=np.array(self.X), y=np.array(self.y))
        for t in range(self.trainingTimes):
            print("#################")
            print("It is at " + str(t) + " training.")
            Xsamples = [[
                self.randomSearchK(1, maxK),
                self.randomSearchLambda(0., maxLambda)
            ] for _ in range(sampleNumber)]
            #print(Xsamples)
            p = np.random.rand(1)
            print("P value : ", p)
            if p <= self.p:
                nextX = self.maxAcquisition(Xsamples)
                self.k = nextX[0]
                print("The next k value is : ", self.k)
                self.lambdaR = nextX[1]
                print("The next lambda value is : ", self.lambdaR)
            else:
                index = int(np.random.rand(1) * len(Xsamples))
                nextX = Xsamples[index]
                self.k = nextX[0]
                print("The next k value is : ", self.k)
                self.lambdaR = nextX[1]
                print("The next lambda value is : ", self.lambdaR)
            self.svd = SVD(self.matrix,
                           int(self.k),
                           biasSVD=self.biasSVD,
                           prediction=True,
                           regularization=self.lambdaR,
                           trainingTimes=self.maxSVDTrainingTimes,
                           device=self.device,
                           learning_rate=self.lr)
            actual = self.estimator_Calculate()
            self.X.append([self.k, self.lambdaR])
            self.y.append(actual)
            print(np.array(self.X))
            print(np.array(self.y))
            self.model = self.model.fit(np.array(self.X), y=np.array(self.y))

    def returnInfor(self):
        return self.X, self.y, self.est, self.predict,

    def getBest(self):
        self.X = np.array(self.X)
        self.y = np.array(self.y)
        ix = np.argmax(self.y)
        return self.X[ix, :]
        84.937 / 100,
        72.927 / 100,
        41.473 / 100,
        9.945 / 100,
    ]
)
print(g)
s = (n, n)
AM = np.zeros(s)
for i in range(0, n):
    for j in range(0, n):
        search = open(lambdaarray[i])
        for line in search:
            if line.split()[0] == xarray[j]:
                AM[i, j] = float(line.split()[1]) * 1.0 / n
svdclass = SVD(n)
f_tsvd = svdclass.f_tsvd(3, AM, g.T)
f_tikhonov = svdclass.f_tikhonov(0.4, AM, g.T)
lcurvex, lcurvey = svdclass.lcurve(AM, g.T)
ginverse_tsvd = np.dot(AM, f_tsvd)
ginverse_tikhonov = np.dot(AM, f_tikhonov)
x = np.arange(0, n)
utb, utbs = svdclass.picardparameter(AM, g.T)
U, s, V = svdclass.svdmatrix(AM)


########collection coefficient from modeling########
iqescr = 0.9
zmo = 2300.0
leff = 920.0
smooverd = 4.3e-4
Пример #22
0
 def gettikhonov_x(self):
     from SVD import SVD
     svd = SVD(size)
     f_tikhonov = svd.f_tikhonov(self.lam, self.AM, self.b.T)
     return f_tikhonov
Пример #23
0
        rawcount = 0
        sumAM = 0.0
        for rawobject in rawAMarray:
            if rawcount > 135*k and rawcount < 135*(k+1):
                sumAM = sumAM + rawobject
            rawcount = rawcount+1
        sumAM = sumAM/135.0
        AMarray = np.append(AMarray,sumAM)
    for j in range(0,n):
        #AMarray = AMarray[::-1]
        AM[i,j] = AMarray[j]/n
print(AM)
noise = np.random.normal(0,1,(n,n))
#AM = AM +noise/100
#TSVD method
svdclass=SVD(n)
f_tsvd = svdclass.f_tsvd(4,AM,g.T)
f_tikhonov =svdclass.f_tikhonov(0.05,AM,g.T)
utb, utbs=svdclass.picardparameter(AM,g.T)


from pylab import *
x= np.arange(0,17)

ax1 = subplot(111)
#ax1.set_yscale('log')
#ax.set_xscale('log')
ax1.scatter (x/17.0,f_tsvd,marker='o',label='TSVD Regularization',color='black')
#ax1.scatter (x,iqeyy,marker='o',label='tkhonov',color='red')
#ax1.scatter(x,f_tsvd,marker='o',label='tkhonov',color='green')
ccx = np.arange(0,2300)
Пример #24
0
        rawcount = 0
        sumAM = 0.0
        for rawobject in rawAMarray:
            if rawcount > 135 * k and rawcount < 135 * (k + 1):
                sumAM = sumAM + rawobject
            rawcount = rawcount + 1
        sumAM = sumAM / 135.0
        AMarray = np.append(AMarray, sumAM)
    for j in range(0, n):
        #AMarray = AMarray[::-1]
        AM[i, j] = AMarray[j] / n
print(AM)
#noise = np.random.normal(0,1,(n,n))
#AM = AM +noise/1000
#TSVD method
svdclass = SVD(n)
g = iqewl
f_tsvd = svdclass.f_tsvd(3, AM, g.T)
f_tikhonov = svdclass.f_tikhonov(0.05, AM, g.T)
utb, utbs = svdclass.picardparameter(AM, g.T)
U, s, V = svdclass.svdmatrix(AM)

from pylab import *
x = np.arange(0, 17)

ax1 = subplot(111)
#ax1.set_yscale('log')
#ax.set_xscale('log')
ax1.scatter(x / 17.0,
            f_tsvd,
            marker='o',
Пример #25
0
 def gettikhonov_x(self):
     from SVD import SVD
     svd = SVD(size)
     f_tikhonov = svd.f_tikhonov(self.lam, self.AM, self.b.T)
     return f_tikhonov
Пример #26
0
        if i < 8:
            fexact = np.append(fexact, 2.0)
        else:
            fexact = np.append(fexact, 1.0)

    b = np.dot(AM, fexact)
    ##############################################
    for i in range(0, size):
        for j in range(0, size):
            s = 1.0 / size * (j + 0.5)
            t = 1.0 / size * (i + 0.5)
            AM[i][j] = 1.0 / size * 0.27 / pow(
                pow(0.25, 2.0) + pow(s - t, 2.0), 1.5)

    from SVD import SVD
    svd = SVD(size)
    f_tikhonov = svd.f_tikhonov(0.00001, AM, b.T)
    fvalue = 0.0
    for f_tikobj in f_tikhonov:
        fvalue += f_tikobj * f_tikobj

    fvalue1 = 0.0
    bre = np.dot(AM, f_tikhonov)
    bdif = bre - b
    for bdifobj in bdif:
        fvalue1 += bdifobj * bdifobj

    optimumarray = np.append(optimumarray, fvalue)

print(optimumarray)