Пример #1
0
def main(args):
    (training_file, label_file, test_file, u_file, e, c, output_file, components) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [float(line.strip()) for line in open(label_file)]
   
    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)
    
    X_test = load_feat(test_file)
    y_test = [float(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    X_test[np.isnan(X_test)] = 0.0
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)
    s = min(len(X_training), len(U))

    
    cca = CCA(n_components=components, max_iter=50)
    (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s])
    X_test_cca = cca.transform(X_test)
    
    svr = SVR(C=c, epsilon=e, kernel='rbf')
    svr.fit(X_cca, y_training[:s])    
    pred = svr.predict(X_test_cca)
    
 
    with open(output_file, 'w') as output:
        for p in pred:
            print >>output, p
    return
def CCA_transform(train_feature, train_label, test_feature, n_components):
    """ CCA: Canonical Correlation Analysis
    """
    from sklearn.cross_decomposition import CCA
    cca = CCA(n_components).fit(train_feature, train_label)
    
    train_feature_transformed = cca.transform(train_feature)
    test_feature_transformed = cca.transform(test_feature)
    
    return train_feature_transformed, test_feature_transformed
Пример #3
0
def canonical_approach():
    from sklearn.cross_decomposition import CCA

    (X, Y), cities = pull_xy_data()

    cca = CCA(n_components=2)
    cca.fit(X, Y)

    ccaX, ccaY = cca.transform(X, Y)

    plot(ccaX, cities, ["CC01", "CC02", "CC03"], 1)

    return "OK What Now?"
    def __init__(self, dataset, n=None, tol=1e-4):
        if n is None:
            n = int(numpy.ceil(numpy.sqrt(len(dataset.attributes))))

        self.dataset = dataset
        self.attributes = random.sample(dataset.attributes, n)

        cca = CCA(n_components=1, tol=tol)
        cca.fit(
            dataset.X.take([a.index for a in self.attributes], 1),
            dataset.y)

        self.linear_combination = LinearCombination(
            self.attributes,
            cca.x_weights_.transpose()[0])
Пример #5
0
def cca_for_ssvep(input_data, sampling_rate, compared_frequencies):

    # TODO: Strick input checks, exceptions and avoid crashing and processing errors

    # Pre-allocate SSVEP signals matrix to be compared with original EEG recordings using CCA
    number_time_points = input_data.shape[1]
    number_harmonics = 2
    cca_base_signal_matrix = [[] for loop_var in compared_frequencies]

    # Pre-allocate output: one correlation coefficient (Rho) for each target SSVEP frequency
    # Note: Row 1 is for default Rho scores, Row 2 is for the Rho scores After cca transformation
    cca_rho_values = numpy.zeros([1, len(compared_frequencies)], dtype='float')

    # For each target frequency, fill Y matrix with sine and cosine signals for every harmonic
    for loop_frequencies in range(len(compared_frequencies)):

        # For this current SSVEP frequency, pre-allocate the harmonics matrix
        cca_base_signal_matrix[loop_frequencies] = numpy.zeros([number_harmonics * 2, number_time_points])
        time_points_count = numpy.arange(number_time_points, dtype='float')
        time_points_count = time_points_count / sampling_rate

        # Generate sine and cosine reference signals, for every harmonic
        for loop_harmonics in range(number_harmonics):

            # Compute the reference signals for current harmonic
            base_constant = 2 * numpy.pi * (loop_harmonics + 1) * compared_frequencies[loop_frequencies]
            base_sine_signal = numpy.sin((base_constant * time_points_count))
            base_cosine_signal = numpy.cos((base_constant * time_points_count))

            # Copy signals back to reference matrix
            base_position = loop_harmonics + 1
            sine_position = (2 * (base_position - 1) + 1)
            cosine_position = 2 * base_position
            cca_base_signal_matrix[loop_frequencies][sine_position - 1, :] = base_sine_signal
            cca_base_signal_matrix[loop_frequencies][cosine_position - 1, :] = base_cosine_signal

        # After the loop, extract the y_matrix from reference matrix for current SSVEP frequency
        y_matrix = cca_base_signal_matrix[loop_frequencies]

        # Create a CCA object and compute the correlation score
        cca_object = CCA(n_components=number_harmonics)
        cca_object.fit(numpy.transpose(input_data), numpy.transpose(y_matrix))
        values_x, values_y = cca_object.transform(input_data, y_matrix)
        cca_rho_values[0, loop_frequencies] = cca_object.score(input_data, y_matrix, values_y)   # Score = Rho value?

    # After loop return and exit
    return cca_rho_values
def fit_CCA(tr_block,data_builder):
    '''We fit a CCA to some 100 odd points???
    '''
    # train on number of points
    num_points = 100
    PixelPoints = data_builder.sample_random_pixels()
    points_array_ipw = []
    points_array_refl = []
    for yr in [14,15]:
        doy_strings = data_builder.club_days(tr_block[tr_block[0][:,1] == yr])
        days_in_sorted = doy_strings.keys()
        days_in_sorted.sort()
        ipw_files,refl_files = data_builder.sort_IPW_refl_files_imgs(yr)
        for set_ in days_in_sorted:
            print 'Building data set for year: %d and string of days %s'%(yr,set_)
            # Get the required files only
            temp_ipw_files = filter(lambda x: re.findall('\d+',x)[1] in doy_strings[set_],ipw_files)
            temp_refl_files = filter(lambda x: re.findall('\d+',x)[1] in doy_strings[set_],refl_files)
            temp_ipw_files = map(lambda x: '../data/dataset/20' + str(yr) + os.sep + x,temp_ipw_files)
            temp_refl_files = map(lambda x: '../data/dataset/20' + str(yr) + os.sep + x,temp_refl_files)
            for x_,y_ in zip(PixelPoints[:num_points,0],PixelPoints[:num_points,1]):
                temp_array = data_builder.build_features_and_truth_imgs(temp_ipw_files,temp_refl_files,x_,y_)
                points_array_ipw.append(temp_array[1])
                points_array_refl.append(temp_array[2])
    X_ = np.vstack(points_array_ipw)
    Y_ = np.vstack(points_array_refl)
    mdl = CCA(n_components = 10)
    print 'Fitting a CCA...'
    mdl.fit(X_[:,:1089],Y_[:,:1089])
    ipw_frames = X_[:,2178:-1]
    refl_frames = Y_[:,2178:]
    del X_
    del Y_
    ipw_frames = ipw_frames[~np.any(np.isnan(ipw_frames),axis = 1),:]
    refl_frames = refl_frames[~np.any(np.isnan(refl_frames),axis = 1),:]
    
#    indices = [(x*1089,(x+1)*1089)for x in range(4) ]
#    # the number of components times 4
#    ipw_refl_fusion = np.zeros((ipw_frames.shape[0],80))
    print 'Building the feature fusion..'
    
    return mdl
def mainExec(name_file1, name_file2, features1, features2):
    '''
    Given two files with names, and two files with features, perform the Stacked Auxiliary Embedding method
    on two matrices. The first one is the concatenation of both feature lists, the second matrix contains tf-idf weighted
    representations of the training sentences of Flickr30kEntities. The intermediate CCA model is written to disk,
    as well as the final model
    :param name_file1
    :param name_file2
    :param features1
    :param features2
    '''
    print "Creating vocabulary"
    voc = readVocabulary()
    print "Generating document vectors"
    occurrenceVectors, idf = createOccurrenceVectors(voc)
    print "Weighing vectors"
    weightedVectors = weight_tfidf(occurrenceVectors, idf)
    print "creating feature dictionary"
    featuresDict = createFeatDict(weightedVectors.keys(), name_file1, name_file2, features1, features2 )
    imagematrix, sentenceMatrix = createSnippetMatrices(featuresDict, weightedVectors)

    print "Modelling cca"
    cca = CCA(n_components = 128)
    cca = fitCCA(cca, imagematrix, sentenceMatrix, "ccasnippetmodel.p")

    trainingimages, trainingsentences = createTrainMatrices(voc)
    trans_img, trans_sent = cca.transform(trainingimages, trainingsentences)

    nn_img = nearest_neighbor(trainingimages)
    nn_sent = nearest_neighbor(trainingsentences)
    print "NN Image: " + str(nn_img)
    print "NN Sentence: " + str(nn_sent)
    augmented_imgs, augmented_sentences = augmentMatrices(nn_img, nn_sent, trainingimages, trainingsentences, trans_img,
                                                          trans_sent)
    print "Fitting augmented CCA model"
    augmentedcca = CCA(n_components=96)
    augmentedcca = fitCCA(augmentedcca, augmented_imgs, augmented_sentences, "augmentedcca.p")
    print "Writing the model to disk"

    resultingModel = StackedCCAModel(nn_img, nn_sent, cca, augmentedcca)

    pickle.dump(resultingModel, open("completestackedCCAModel.p", 'w+'))
Пример #8
0
def main(args):
    (training_file, label_file, test_file, test_label, u_file) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [int(line.strip()) for line in open(label_file)]
   
    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)
    
    X_test = load_feat(test_file)
    y_test = [int(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)

    
    cca = CCA(n_components=100)
    (X_cca, U_cca) = cca.fit_transform(X_training, U[:n])
    X_test_cca = cca.predict(X_test)
    
    svr = SVC()
    svr.fit(X_cca, y_training)    
    pred = svr.predict(X_test_cca)
    
    print pred
    print test_y
    print accuracy_score(y_test, pred)
    with open(test_file + '.cca.2.pred', 'w') as output:
        for p in pred:
            print >>output, p
    #svm_model.fit(X, y)
    #pickle.dump(lr, open(model_file, "wb"))
    return


    return
Пример #9
0
def test_cca_implementation():
    X = np.random.multivariate_normal(np.random.randint(50,100,(10)).astype('float'),np.identity(10),200)
    Y = np.random.multivariate_normal(np.random.randint(80,200,(6)).astype('float'),np.identity(6),200)

    X_test = np.random.multivariate_normal(np.random.randint(50,100,(10)).astype('float'),np.identity(10),20)
    Y_test = np.random.multivariate_normal(np.random.randint(50,100,(6)).astype('float'),np.identity(6),20)
    
    mdl_test = CCA(n_components = 6)
    mdl_test.fit(X,Y)
    
    Y_pred = mdl_test.predict(X)
    
    print Y_pred
    print '-'*50
#    print Y_test

    from sklearn.cross_decomposition import CCA as CCA_sklearn
    
    mdl_actual = CCA_sklearn(n_components = 6)
    mdl_actual.fit(X,Y)
    
    print '-'*50
    Y_actual = mdl_actual.predict(X)
    print Y_actual
Пример #10
0
n = 500

# 2 latents vars:
l1 = np.random.normal(size=n)
l2 = np.random.normal(size=n)

latents = np.array([l1, l1, l2, l2]).T
X = latents + np.random.normal(size=4 * n).reshape((n, 4))
Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

###############################################################################
# Compare the projection on first component of CCA, kernel CCA
# with linear kernel, polynomial kernel and rbf kernel

cca = CCA(n_components=1)
cca.fit(X, Y)
r_cca = np.corrcoef(cca.x_scores_.T, cca.y_scores_.T)[0, 1]

# linear kernel CCA
kcca1 = KernelCCA(kernel="linear", n_components=1, kapa=0.1,
                  eta=0.1, pgso=True, center=True)
kcca1.fit(X, Y)
kx_linear_scores = np.dot(kcca1.KXc_, kcca1.alphas_)
ky_linear_scores = np.dot(kcca1.KYc_, kcca1.betas_)

# polynomial kernel CCA
kcca2 = KernelCCA(kernel="poly", n_components=1, kapa=0.1,
                  eta=0.1, pgso=True, center=True, coef0=0.1)
kcca2.fit(X, Y)
kx_poly_scores = np.dot(kcca2.KXc_, kcca2.alphas_)
Пример #11
0
    X, good_idx = remove_outliers(X, 6.0)
    y = y.ix[y.index[good_idx]]

    # sanity check
    # idx = np.random.permutation(len(y))[0]
    # idx = np.where(y.index == 119384)[0][0]
    # image_sanity_check(y.index[idx], X[idx])

    # only keep unique values
    unique_cols = ['Class1.1', 'Class1.2', 'Class2.1', 'Class3.1', 'Class4.1', 'Class5.1', 'Class5.2', 'Class5.3',
                   'Class6.1', 'Class7.1', 'Class7.2', 'Class8.1', 'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5',
                   'Class8.6', 'Class9.1', 'Class9.2', 'Class10.1', 'Class10.2', 'Class11.1', 'Class11.2',
                   'Class11.3', 'Class11.4', 'Class11.5']

    # do CCA
    if verbose:
        print 'Doing CCA...'
    cca = CCA(n_components=len(unique_cols), copy=False)
    X_cca, y_cca = cca.fit_transform(X, y[unique_cols].values.astype(np.float32))

    cPickle.dump(cca, open(base_dir + 'data/CCA_DCT.pickle', 'wb'))

    # make plots
    make_cca_images(cca, (100, 100), dct_idx=dct_idx)
    fig = plot_cca_projections(X_cca)
    fig.savefig(plot_dir + 'CCA_dist_no_outliers.png')
    if doshow:
        plt.show()

    print 'Saving the transformed values...'
    np.save(base_dir + 'data/CCA_training_transform', X_cca)
Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print("True B (such that: Y = XB + Err)")
print(B)
# compare pls2.coef_ with B
print("Estimated B")
print(np.round(pls2.coef_, 1))
pls2.predict(X)

# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
pls1 = PLSRegression(n_components=3)
pls1.fit(X, y)
# note that the number of components exceeds 1 (the dimension of y)
print("Estimated betas")
print(np.round(pls1.coef_, 1))

# #############################################################################
# CCA (PLS mode B with symmetric deflation)

cca = CCA(n_components=2)
cca.fit(X_train, Y_train)
X_train_r, Y_train_r = cca.transform(X_train, Y_train)
X_test_r, Y_test_r = cca.transform(X_test, Y_test)
pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print("True B (such that: Y = XB + Err)")
print(B)
# compare pls2.coef_ with B
print("Estimated B")
print(np.round(pls2.coef_, 1))
pls2.predict(X)

###############################################################################
# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
pls1 = PLSRegression(n_components=3)
pls1.fit(X, y)
# note that the number of compements exceeds 1 (the dimension of y)
print("Estimated betas")
print(np.round(pls1.coef_, 1))

###############################################################################
# CCA (PLS mode B with symmetric deflation)

cca = CCA(n_components=2)
cca.fit(X_train, Y_train)
X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
X_test_r, Y_test_r = plsca.transform(X_test, Y_test)
Пример #14
0
	# session.execute("CREATE KEYSPACE IF NOT EXISTS TweetsXiaohu WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 };")
	session.execute("USE TweetsXiaohu")
	# session.execute("DROP TABLE IF EXISTS Tweet")
	rows = session.execute("SELECT text, hashtags FROM Tweet limit 1000")
	X, Y = [], []
	for row in rows:
		X.append(row.text)
		Y.append([x.lower() for x in row.hashtags])
	vectorizer = StemmedTfidfVectorizer(min_df=1, stop_words='english', decode_error='ignore')
	# print(vectorizer)

	X = vectorizer.fit_transform(X).toarray()
	# print '40', X
	# print type(X)
	Y_indicator = LabelBinarizer().fit(Y).transform(Y)
	cca = CCA(n_components = 100, max_iter=10)
	cca.fit(X, Y_indicator)
	X = cca.transform(X)
	# print '45', X
	# print type(X)
	classif = OneVsRestClassifier(SVC(kernel='linear'))
	classif.fit(X, Y)

	for row in rows:
		# row = rows[0]
		# print vectorizer.transform([row.text]).toarray()
		# print cca.predict(vectorizer.transform([row.text]).toarray())
		transformed = vectorizer.transform([row.text]).toarray()
		# print '55', transformed
		ccad = cca.transform(transformed)
		# print '57', ccad
def main():
    sess = tf.InteractiveSession()

    X1_data, X2_data, Y_data, baseline_data, labels_data = read_inputs()
    
    # set up the DCCA network
    keep_input = tf.placeholder("float")
    keep_hidden = tf.placeholder("float")
    X1_in, X1_out = build_network(273, 1500, 1500, 1500, 50, keep_input, keep_hidden)
    X2_in, X2_out = build_network(112, 1500, 1500, 1500, 50, keep_input, keep_hidden)

    # define the DCCA cost function
    U = tf.placeholder("float", [50, 40])
    V = tf.placeholder("float", [50, 40])
    UtF = tf.matmul(tf.transpose(U), tf.transpose(X1_out))
    GtV = tf.matmul(X2_out, V)
    canon_corr = tf.mul(1./BATCH, tf.reduce_sum(tf.mul(tf.matmul(UtF, GtV), tf.constant(np.eye(40), dtype = tf.float32))))

    corr_step = tf.train.AdamOptimizer(1e-6).minimize(- canon_corr)

    sess.run(tf.initialize_all_variables())

    # train the network
    print "Training DCCA"
    for i in range(0, EPOCHS):
        for j in range(0, len(X1_data.train), int(BATCH)):
            X1_in_batch = X1_data.train[j:(j + BATCH)]
            X2_in_batch = X2_data.train[j:(j + BATCH)]

            X1_out_batch = X1_out.eval(feed_dict = {
                X1_in : X1_in_batch,
                keep_input : 1.0,
                keep_hidden : 1.0})
            X2_out_batch = X2_out.eval(feed_dict = {
                X2_in : X2_in_batch,
                keep_input : 1.0,
                keep_hidden : 1.0})

            # compute CCA on the output layers
            cca = CCA(n_components = 40)
            cca.fit(X1_out_batch, X2_out_batch)
            U_batch = cca.x_weights_
            V_batch = cca.y_weights_

            # perform gradient step
            corr_step.run(feed_dict = {
                X1_in : X1_in_batch,
                X2_in : X2_in_batch,
                U : U_batch,
                V : V_batch,
                keep_input : 0.9,
                keep_hidden : 0.8})

            # print useful info
            print "EPOCH", i, "/ COST", canon_corr.eval(feed_dict = {
                X1_in : X1_in_batch,
                X2_in : X2_in_batch,
                U : U_batch,
                V : V_batch,
                keep_input : 1.0,
                keep_hidden : 1.0})

    # train the softmax classifier
    print "Training softmax"
    W_s = weight_variable([89, 39])
    b_s = bias_variable([39])
    baseline = tf.placeholder("float", [None, 39])
    y_true = tf.placeholder("float", [None, 39])

    # define the cost
    X1_baseline_combo = tf.concat(1, [X1_out, baseline])
    y_pred = tf.nn.softmax(tf.matmul(X1_baseline_combo, W_s) + b_s)
    lr_cost = - tf.reduce_sum(y_true * tf.log(tf.clip_by_value(y_pred, 1e-10, 1.0)))
    lr_step = tf.train.AdamOptimizer(1e-4).minimize(lr_cost)

    # set up accuracy checking
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    sess.run(tf.initialize_all_variables())

    for i in range(0, EPOCHS):
        for j in range(0, len(X1_data.train), int(BATCH)):
            lr_step.run(feed_dict = {
                X1_in : X1_data.train[j:(j + BATCH)],
                y_true : Y_data.train[j:(j + BATCH)],
                baseline : baseline_data.train[j:(j + BATCH)],
                keep_input : 1.0,
                keep_hidden : 1.0})

        print i, accuracy.eval(feed_dict = {
            X1_in : X1_data.dev,
            y_true : Y_data.dev,
            baseline : baseline_data.dev,
            keep_input : 1.0,
            keep_hidden : 1.0})

    print "Test accuracy:", accuracy.eval(feed_dict = {
        X1_in : X1_data.test,
        y_true : Y_data.test,
        baseline : baseline_data.test,
        keep_input : 1.0,
        keep_hidden : 1.0})

    # project the data and print it to file
    X1_train_proj = X1_baseline_combo.eval(feed_dict = {
        X1_in : X1_data.train,
        baseline : baseline_data.train,
        keep_input : 1.0,
        keep_hidden : 1.0})

    X1_dev_proj = X1_baseline_combo.eval(feed_dict = {
        X1_in : X1_data.dev,
        baseline : baseline_data.dev,
        keep_input : 1.0,
        keep_hidden : 1.0})

    X1_test_proj = X1_baseline_combo.eval(feed_dict = {
        X1_in : X1_data.test,
        baseline : baseline_data.test,
        keep_input : 1.0,
        keep_hidden : 1.0})

    scipy.io.savemat("dcca_projected_data.mat", {'dataTr' : X1_train_proj, "PhonesTr" : labels_data.train, "dataDev" : X1_dev_proj, "PhonesDev" : labels_data.dev, "dataTest" : X1_test_proj, "PhonesTest" : labels_data.test})
Пример #16
0
    dir_name = configuration.output_parameters['path']

    if not os.path.isdir(dir_name):
        os.makedirs(dir_name)

    OutputLog().set_path(dir_name)
    OutputLog().set_verbosity(configuration.output_parameters['verbosity'])

    data_config = ConfigParser.ConfigParser()
    data_config.read(data_set_config)
    data_parameters = ConfigSectionMap("dataset_parameters", data_config)

    # construct data set
    data_set = Container().create(data_parameters['name'], data_parameters)

    cca_model = CCA(n_components=top, scale=True, copy=False)

    train_transformed_x, train_transformed_y = cca_model.fit_transform(data_set.trainset[0], data_set.trainset[1])
    test_transformed_x, test_transformed_y = cca_model.transform(data_set.testset[0], data_set.testset[1])

    OutputLog().write('test results:')
    correlations, trace_correlation, var, x_test, y_test, test_best_layer = TraceCorrelationTester(
        data_set.testset[0],
        data_set.testset[1], top).test(IdentityTransformer(), configuration.hyper_parameters)

    OutputLog().write('train results:')
    correlations, train_trace_correlation, var, x_train, y_train, train_best_layer = TraceCorrelationTester(
        data_set.trainset[0],
        data_set.trainset[1], top).test(IdentityTransformer(), configuration.hyper_parameters)

    OutputLog().write('\nTest results : \n')
Пример #17
0
# check type of array
#print(np.dtype(data_selection))

# force dtype = float32
data_selection = data_selection.astype(np.float32, copy=False)

# complete cases
data_selection = data_selection[~np.isnan(data_selection).any(axis=1)]
data_selection = data_selection[np.isfinite(data_selection).any(axis=1)]

# target variable / covariates
y = data_selection[:,0:3]
x = data_selection[:,4:]

# split test-train
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x,y, test_size=0.2, random_state=0)


cca = CCA(n_components=1,scale=True)
cca.fit(x_train, y_train)
#CCA(copy=True, max_iter=500, n_components=1, scale=True, tol=1e-06),
X_train_r, Y_train_r = cca.transform(x_train,y_train)
X_test_r, Y_test_r = cca.transform(x_test, y_test)

print(type(X_train_r))
print(np.shape(X_train_r))
print(np.shape(Y_train_r))
print(np.shape(x))

print(np.corrcoef(X_train_r[:,0],Y_train_r[:,0]))
print(np.corrcoef(X_test_r[:,0],Y_test_r[:,0]))
Пример #18
0
    #%% Plot accuracies for PLSSVD 
    plt.figure()
    for i in range (5):
        plt.plot(nComponents,plsRegScores[i,:],lw=3)

    plt.xlim(1,np.amax(nComponents))
    plt.title('PLS Regression accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend (['LR','LDA','GNB','Linear SVM','rbf SVM'],loc='lower right')
    plt.grid(True)

if (0):
    #%% Canonical Correlation Analysis
    nComponents = np.arange(1,nClasses +1)
    cca = CCA(n_components=nClasses)
    cca.fit(Xtrain,Ytrain)
    XtrainT = cca.transform(Xtrain)
    XtestT = cca.transform(Xtest)
    ccaScores = np.zeros((5,np.alen(nComponents)))
    for i,n in enumerate(nComponents):
        ccaScores[:,i] = util.classify(XtrainT[:,0:n],XtestT[:,0:n],labelsTrain,labelsTest)
    
    cca = CCA(n_components=3)
    cca.fit(Xtrain,Ytrain)
    xt = cca.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig,xt,labelsTrain,classColors)
    plt.title('First 3 components of projected data')
    
Пример #19
0
__author__ = 'cancobanoglu'
'''
 CCA is Canonical Correlation Analysis
'''

print(__doc__)

from sklearn.cross_decomposition import CCA
from sklearn import datasets

X = [[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [3., 5., 4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]

cca = CCA(n_components=1)
cca.fit(X, Y)

CCA(copy=True, max_iter=500, n_components=1, scale=True, tol=1e-06)

X_c, Y_c = cca.transform(X, Y)
Пример #20
0
class CCA_Model:
    def __init__(self,n_components):
        self.n_components = n_components
        self.cca = CCA(n_components=n_components)
        self.ntop  = 10


    def learn_model(self,X_chanel, Y_chanel,Y_Distinct=None):
        """

        :param X_chanel: array-like for X chanel
        :param Y_chanel: array-line for Y chanel
        :return:

        """
        print "Start learning..."

        self.x_dim  = len(X_chanel[0])
        self.y_dim = len(Y_chanel[0])
        self.cca.fit(X_chanel,Y_chanel)
        if Y_Distinct == None:
            self.X_transform ,self.Y_transform = self.cca.transform(X_chanel,Y_chanel)
        else:
            self.X_transform ,self.Y_transform = self.cca.transform(X_chanel,Y_Distinct)

        print "Learning completed"


    def get_bet_match_index_transform_x2y(self,x_transform):
        shape = self.Y_transform.shape
        scores = np.ndarray(shape[0],dtype=float)
        for i in xrange(shape[0]):
            scores[i] = np.dot(self.Y_transform[i],x_transform)
            #scores[i] = entropy(x_transform,self.Y_transform[i])

        indices = (-scores).argsort()[:self.ntop]
        return [indices, scores[indices]]


    def get_bet_match_index_transform_y2x(self,y_transform):
        shape = self.X_transform.shape
        scores = np.ndarray(shape[0], dtype=float)
        for i in xrange(shape[0]):
            scores[i] = np.dot(self.X_transform[i], y_transform)
            #scores[i] = entropy(y_transform,self.X_transform[i])
        indices = (-scores).argsort()[:self.ntop]

        return [indices, scores[indices]]

    def get_best_match_cross_indices_x2y(self,x_inputs):
        x_transformes = self.cca.transform(x_inputs)
        results = []
        for x_transform in x_transformes:
            results.append(self.get_bet_match_index_transform_x2y(x_transform))
        return results

    def get_best_match_cross_indices_y2x(self,y_inputs):
        _, y_transformes = self.cca.transform([[0 for i in xrange(self.x_dim)]],y_inputs)
        results = []
        for y_transform in y_transformes:
            results.append(self.get_bet_match_index_transform_y2x(y_transform))
        return results
def mainExec(name_file, features):
    '''
    Based on a list of image names and image features, learn a CCA model based on Stacked Auxiliary Embedding and
    save this model to disk.
    :param name_file
    :param features
    :return:
    '''
    print "Creating vocabulary"
    voc = readVocabulary()
    print "Generating document vectors"
    occurrenceVectors, idf = createOccurrenceVectors(voc)
    print "Weighing vectors"
    weightedVectors = weight_tfidf(occurrenceVectors, idf)

    sentenceMatrix = []
    imagematrix = []
    print "Creating matrices"
    currentSentence = 0
    for i in weightedVectors.keys():
        if isLargeEnough(i):
            currentSentence += 1
            print "current Sentence: " + str(currentSentence)
            for j in range(len(weightedVectors[i])):
                weightedVectors[i][j] = float(weightedVectors[i][j])
            if currentSentence == 1:
                sentenceMatrix = weightedVectors[i]
                imagematrix = getImage(i,name_file, features)
            elif currentSentence ==2:
                sentenceMatrix = np.concatenate(([sentenceMatrix], [weightedVectors[i]]), axis = 0)
                imagematrix = np.concatenate(([imagematrix], [getImage(i,name_file, features)]), axis = 0)
            else:
                sentenceMatrix = np.concatenate((sentenceMatrix, [weightedVectors[i]]), axis = 0)
                imagematrix = np.concatenate((imagematrix, [getImage(i,name_file, features)]), axis = 0)

    print "Modelling cca"
    cca = CCA(n_components=128)
    cca.fit(sentenceMatrix, imagematrix)
    pickle.dump(cca, open("ccasnippetmodel.p",'w+'))

    idf = np.zeros(len(voc))
    trainingimages = []
    trainingsentences = []
    dp = getDataProvider('flickr30k')
    currentPair = 0
    for pair in dp.sampleImageSentencePair():
        currentPair += 1
        if currentPair % 100 == 0:
            print "Current pair: " + str(currentPair)
        img = pair['image']['feat']
        trainingimages.append(img)
        sentence = getFullSentence(pair)
        for i in range(len(sentence)):
            if sentence[i] > 0:
                idf[i] += 1
        trainingsentences.append(sentence)
    for i in range(len(trainingsentences)):
        trainingsentences[i] = trainingsentences[i]*idf

    trans_img, trans_sent = cca.transform(trainingimages, trainingsentences)
    nn_img = nearest_neighbor(trainingimages)
    nn_sent = nearest_neighbor(trainingsentences)

    augmented_imgs = []
    augmented_sentences = []
    for i in range(len(trans_img)):
        augm_img = trainingimages[i].extend(phi(3000,nn_img, trans_img[i]))
        augmented_imgs.append(augm_img)

    for i in range(len(trans_sent)):
        augm_sent = trainingsentences[i].extend(phi(3000, nn_sent, trans_sent[i]))
        augmented_sentences.append(augm_sent)

    augmentedcca = CCA(n_components= 96)
    augmentedcca.fit(augmented_sentences, augmented_imgs)

    pickle.dump(cca, open("augmentedcca.p",'w+'))
Пример #22
0
 def __init__(self,n_components):
     self.n_components = n_components
     self.cca = CCA(n_components=n_components)
     self.ntop  = 10
Пример #23
0
[ 156, 33, 54, 15, 225,  73],
[ 138, 33, 68,  2, 110,  43]
]

print X.shape

#X = N.array(Z)[:,0:3].tolist()
#Y = N.array(Z)[:,3:6].tolist()
print 'X=\n',X
print 'Y=\n',Y


Rx = N.corrcoef(X.T)
Ry = N.corrcoef(Y.T)

cca = CCA(n_components=1)
cca.fit(X, Y)

print "Rx:\n", Rx
print "Ry:\n", Ry
print "x_weights:\n", cca.x_weights_
print "y_weights:\n", cca.y_weights_
print "x_loadings:\n", cca.x_loadings_
print "y_loadings:\n", cca.y_loadings_
print "x_scores_:\n", cca.x_scores_
print "y_scores_:\n", cca.y_scores_

loadings_man_x = N.dot(Rx, cca.x_weights_)
loadings_man_y = N.dot(Ry, cca.y_weights_)
print "loadings_man_x:\n",loadings_man_x
print "loadings_man_y:\n",loadings_man_y
Пример #24
0
# each Yj = 1*X1 + 2*X2 + noize
Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print("True B (such that: Y = XB + Err)")
print(B)
# compare pls2.coef_ with B
print("Estimated B")
print(np.round(pls2.coef_, 1))
pls2.predict(X)

# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
pls1 = PLSRegression(n_components=3)
pls1.fit(X, y)
# note that the number of components exceeds 1 (the dimension of y)
print("Estimated betas")
print(np.round(pls1.coef_, 1))

# #############################################################################
# CCA (PLS mode B with symmetric deflation)

cca = CCA(n_components=2)
cca.fit(X_train, Y_train)
X_train_r, Y_train_r = cca.transform(X_train, Y_train)
X_test_r, Y_test_r = cca.transform(X_test, Y_test)