# use regression
    X_v, X_a = regression(train_feat, val_mean, aro_mean)

    # calculating features for whole dataset

    #print all_feat.shape

    # use regresion function to calculate v and a
    all_val = np.sum(np.array(test_feat) * X_v, axis=1)
    all_aro = np.sum(test_feat * X_a, axis=1)

    #print all_val.shape
    #print all_aro.shape

    print "ATTEMPT" + str(i)
    avg = average_distance_va(all_val, all_aro, valence, arousal, all_ids)
    nearest = nearest_dist_average_va(all_val, all_aro, valence, arousal,
                                      all_ids)
    standdev = no_stdev_average_va(all_val, all_aro, val_mean, aro_mean,
                                   valence, arousal, all_ids)

    valence_dist = valence_distance_va(all_val, all_aro, valence, arousal,
                                       all_ids)
    arousal_dist = arousal_distance_va(all_val, all_aro, valence, arousal,
                                       all_ids)

    print 'Average distance: ' + str(avg)
    print 'Nearest distance: ' + str(nearest)
    print 'Nearest distance: ' + str(standdev)

    if avg < best_avg:
        # print "check 5"
        # print X.shape
        # print len(Yv)
        # print len(Ya)

        clf_1 = DecisionTreeRegressor(max_depth=j)
        clf_2 = DecisionTreeRegressor(max_depth=j)
        clf_1.fit(Xtrain, Yvtrain)
        clf_2.fit(Xtrain, Yatrain)

        Yvpred = clf_1.predict(Xtest)
        Yapred = clf_2.predict(Xtest)
        # print len(Yvpred)
        # print len(Yvtest)
        # print Yvpred.shape
        # print Yvtest.shape
        # avg = averagedist(Yvpred, Yapred, Yvtest, Yatest)
        avg = average_distance_va(Yvpred, Yapred, valence, arousal, idstest)

        if avg < best_avg:
            best_avg = avg
    print 'j ' + str(j)
    #print "BEST"
    print 'Average distance: ' + str(best_avg)
    # print 'Nearest distance: ' + str(best_near)
    # print 'Std distance: ' + str(best_std)

    # print best_val
    # print best_aro
print 'BEST'
print 'Average distance: ' + str(best_avg)
	# use regression
    X_v, X_a = regression(train_feat, val_mean, aro_mean)

	# calculating features for whole dataset

	#print all_feat.shape

	# use regresion function to calculate v and a
    all_val = np.sum(np.array(test_feat) * X_v, axis=1)
    all_aro = np.sum(test_feat * X_a, axis=1)

	#print all_val.shape
	#print all_aro.shape

    print "ATTEMPT" + str(i)
    avg = average_distance_va(all_val, all_aro, valence, arousal, all_ids)
    nearest = nearest_dist_average_va(all_val, all_aro, valence, arousal, all_ids)
    standdev = no_stdev_average_va(all_val, all_aro, val_mean, aro_mean, valence, arousal, all_ids)

    valence_dist = valence_distance_va(all_val, all_aro, valence, arousal, all_ids)
    arousal_dist = arousal_distance_va(all_val, all_aro, valence, arousal, all_ids)

    print 'Average distance: ' + str(avg) 
    print 'Nearest distance: ' + str(nearest) 
    print 'Nearest distance: ' + str(standdev) 

    if avg < best_avg:
        best_avg = avg
        best_near = nearest
        best_std = standdev
    print "check 5"
    print X.shape
    print len(Yv)
    print len(Ya)

    clf_1 = DecisionTreeRegressor(max_depth=2)
    clf_2 = DecisionTreeRegressor(max_depth=2)
    clf_1.fit(Xtrain, Yvtrain)
    clf_2.fit(Xtrain, Yatrain)

    Yvpred = clf_1.predict(Xtest)
    Yapred = clf_2.predict(Xtest)
    # print len(Yvpred)
    # print len(Yvtest)
    # print Yvpred.shape
    # print Yvtest.shape
    # avg = averagedist(Yvpred, Yapred, Yvtest, Yatest)
    avg = average_distance_va(Yvpred, Yapred, valence, arousal, idstest)

    if avg < best_avg:
        best_avg = avg

print "BEST"
print 'Average distance: ' + str(best_avg)
# print 'Nearest distance: ' + str(best_near)
# print 'Std distance: ' + str(best_std)

# print best_val
# print best_aro
ids, feat = read_fake_chroma('features/spectrum')
train_ids = ids
random.shuffle(train_ids)
all_ids = train_ids[141:]
train_ids = train_ids[0:140]


# calcultae valence and arousal find_a_v_mens
val_mean, aro_mean = find_a_v_mens_va(train_ids, valence, arousal)
train_feat = find_in_dict(feat, train_ids)
test_feat = find_in_dict(feat, all_ids)

# use regression
X_v, X_a = regression(train_feat, val_mean, aro_mean)

# calculating features for whole dataset

#print all_feat.shape

# use regresion function to calculate v and a
all_val = np.sum(np.array(test_feat) * X_v, axis=1)
all_aro = np.sum(test_feat * X_a, axis=1)

#print all_val.shape
#print all_aro.shape

#plot_all_va(all_val, all_aro, all_ids, valence, arousal)

print 'Average distance: ' + str(average_distance_va(all_val, all_aro, valence, arousal, all_ids)) 
print 'Nearest distance: ' + str(nearest_dist_average_va(all_val, all_aro, valence, arousal, all_ids)) 
print 'Std distance: ' + str(no_stdev_average_va(all_val, all_aro, val_mean, aro_mean, valence, arousal, all_ids))