Exemplo n.º 1
0
def FM_plot(model, metric_top, metric):
    '''model: list of strings, sublist of model_dict
    metric_top: string in feat_dict
    metric: string in metrics_dict'''
    D_top = matrix_slicing(model, metric_top)
    D_metric = matrix_slicing(model, metric)
    num_graphs = len(D_top[0])  # number of graphs
    top_feat = metric_top.replace("top_", "")
    file_name = top_feat + '_' + metric + '_'
    for m in model:
        file_name += m + '_'
    fms_compare(sqf(D_top), sqf(D_metric), num_graphs,
                top_feat + ' vs ' + metric, file_name)
    plt.clf()
Exemplo n.º 2
0
def binary_part_b(train, test):
    train = train.loc[(train[784] == 2) | (train[784] == 3)]
    train = train.reset_index(drop=True)
    train_X = train.iloc[:, 0:784]
    train_Y = train.iloc[:, 784]
    train_X = (train_X.as_matrix()) / 255
    train_Y = train_Y.as_matrix()
    train_Y[train_Y == 2] = -1
    train_Y[train_Y == 3] = 1
    test = test.loc[(test[784] == 2) | (test[784] == 3)]
    test = test.reset_index(drop=True)
    test_X = test.iloc[:, 0:784]
    test_Y = test.iloc[:, 784]
    test_X = (test_X.as_matrix()) / 255
    test_Y = test_Y.as_matrix()
    test_Y[test_Y == 2] = -1
    test_Y[test_Y == 3] = 1
    R = pdist(train_X, 'sqeuclidean')
    K = np.exp(-0.05 * sqf(R))
    x = train_Y[:, None]
    y = np.transpose(train_Y[:, None])
    K = x @ y * K
    pts = train_X.shape[0]
    p = cvx_matrix(K)
    q = cvx_matrix(-1 * np.ones((pts, 1)))
    m = np.diag(-1 * np.ones(pts))
    n = np.identity(pts)
    G = cvx_matrix(np.vstack((m, n)))
    h = cvx_matrix(np.hstack((np.zeros(pts), np.ones(pts))))
    A = cvx_matrix(train_Y.reshape(1, -1).astype(np.double))
    b = cvx_matrix(np.zeros(1))
    result = solvers.qp(p, q, G, h, A, b)
    x_val = np.array(result['x']).flatten()
    val = .0001
    y_val = np.arange(len(x_val))[x_val > val]
    alpha = x_val[x_val > val]
    xi = train_X[x_val > val]
    yi = train_Y[x_val > val]
    b = 0
    for i in range(len(yi)):
        b += yi[i]
        temp = K[y_val[i], x_val > val]
        b -= np.sum(alpha * temp * yi)
    b /= len(yi)
    pts = test_X.shape[0]
    pred_y = np.zeros(pts)
    for i in range(pts):
        val = 0
        for j in range(len(alpha)):
            temp = test_X[i] - xi[j]
            temp = np.exp((temp.dot(temp)) * 0.05 * -1)
            val += alpha[j] * yi[j] * temp
        pred_y[i] = val
    pred_y += b
    pred_y[pred_y >= 0] = 1
    pred_y[pred_y < 0] = -1
    accuracy = (np.sum(pred_y == test_Y)) / pts * 100
    print(accuracy)
Exemplo n.º 3
0
def cluster_dendogram(model, metric):
    '''model: list of strings, sublist of model_dict
    metric is a string: 'PD', 'TEuc', 'TEMD', 'random', 'top_log_Betti' or 'top_log_simplex'
    '''
    #obtain sliced distance matrix
    D = matrix_slicing(model, metric)
    ZXc = hierarchy.linkage(sqf(D), method='complete')
    title = metric
    for m in model:
        title += '_' + m
    # Calculate dendogram
    plt.figure(figsize=(25, 10))
    plt.title(title)
    plt.xlabel('sample index')
    plt.ylabel(metric + '_distance')
    hierarchy.dendrogram(
        ZXc,
        leaf_rotation=90.,  # rotates the x axis labels
        leaf_font_size=8.,  # font size for the x axis labels
    )
    plt.savefig(path_dendo + title + '.jpg')
    plt.clf()
Exemplo n.º 4
0
# # sdm = sklearn.metrics.pairwise.euclidean_distances( cpdf_2015_2_nm.values )


# dm0 = pd.read_csv(fl , header=0 , sep = ' ')



# dm0 = pd.read_csv(fl , header=0 , sep = ' ')

    
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform as sqf

dmc03_cnt_ind = copy.deepcopy( dmc03 )

Dsqf = sqf( dmc03_cnt_ind , checks = False)
# Dsqf = sqf(dm0.values )
# print ( sdm.values - sdm.values.transpose()) 

# sdm
linkage_matrix = linkage(Dsqf, 'complete')
figure = plt.figure(figsize=(7.5, 5))
dendrogram(
    linkage_matrix,
    color_threshold=0,
    labels = pdf_cnt_ind_mrk2.columns 
)
plt.title('Hierarchical Clustering Dendrogram (Single)')
plt.xlabel('Currency Symbol')
plt.ylabel('AC measure')
plt.tight_layout()
Exemplo n.º 5
0
def multi_part_a(train, test):
    k = list(itertools.combinations([i for i in range(5)], 2))
    #k=[(0,1),(7,9)]
    total_classes = len(k)
    final_pred = []
    list3 = []
    for i in range(total_classes):
        train_X, train_Y = train_data_format(train, k[i][0], k[i][1])
        test_X, test_Y = test_data_format(test, k[i][0], k[i][1])
        R = pdist(train_X, 'sqeuclidean')
        K = np.exp(-0.05 * sqf(R))
        x = train_Y[:, None]
        y = np.transpose(train_Y[:, None])
        K = x @ y * K
        pts = train_X.shape[0]
        p = cvx_matrix(K)
        q = cvx_matrix(-1 * np.ones((pts, 1)))
        m = np.diag(-1 * np.ones(pts))
        n = np.identity(pts)
        G = cvx_matrix(np.vstack((m, n)))
        h = cvx_matrix(np.hstack((np.zeros(pts), np.ones(pts))))
        A = cvx_matrix(train_Y.reshape(1, -1).astype(np.double))
        b = cvx_matrix(np.zeros(1))
        result = solvers.qp(p, q, G, h, A, b)
        x_val = np.array(result['x']).flatten()
        #print(x_val)
        val = .0001
        y_val = np.arange(len(x_val))[x_val > val]
        alpha = x_val[x_val > val]
        #print(alpha)
        xi = train_X[x_val > val]
        #print(xi)
        yi = train_Y[x_val > val]
        #print(yi)
        b = 0
        for ii in range(len(yi)):
            b += yi[ii]
            temp = K[y_val[ii], x_val > val]
            b -= np.sum(alpha * temp * yi)
        b /= len(yi)
        pts = test_X.shape[0]
        pred_y = np.zeros(pts)
        for iii in range(pts):
            val = 0
            for j in range(len(alpha)):
                temp = test_X[iii] - xi[j]
                temp = np.exp((temp.dot(temp)) * 0.05 * -1)
                val += alpha[j] * yi[j] * temp
            pred_y[iii] = val
        pred_y += b
        pred_y[pred_y >= 0] = 1
        pred_y[pred_y < 0] = -1
        accuracy = (np.sum(pred_y == test_Y)) / pts * 100
        print(accuracy)
        pred_y[pred_y < 0] = k[i][0]
        pred_y[pred_y > 0] = k[i][1]
        final_pred.append(pred_y)
        list2 = []
        for j in final_pred[i]:
            if j == 1:
                list2.append(k[i][1])
            else:
                list2.append(k[i][0])
        list3.append(list2)
        print('value of i is:' + str(i))
    list4 = np.transpose(np.array(list3))
    final_predcition = []
    for it in range(list4.shape[0]):
        val = max(set(list(list4[i])), key=list(list4[i]).count)
        final_predcition.append(val)
    final_predcition = np.array(final_predcition)
    accuracy = (np.sum(final_predcition == test_Y)) / pts * 100
    print(accuracy)
    return final_predcition
Exemplo n.º 6
0
# metrics.pairwise.euclidean_distances?

# dm0 = pd.read_csv(fl , header=0 , sep = ' ')

from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform as sqf

# Dsqf = sqf(dm0.values )
# print ( sdm.values - sdm.values.transpose())

distance_cut = 0.50
method = 'ward'

sdm = metrics.pairwise.euclidean_distances(cpdf_2015_2.values)
Dsqf = sqf(sdm, checks=False)
linkage_matrix = linkage(Dsqf, method)

from scipy.cluster.hierarchy import fcluster
max_d = distance_cut
clusters = fcluster(linkage_matrix, max_d, criterion='distance')
clusters

cluster_methodology = 'HC_' + method + '_D=' + str(distance_cut)
cpdf_o[cluster_methodology] = clusters

# printing clusters
labels = cpdf_2015_2.index

max(clusters)
min(clusters)
Exemplo n.º 7
0
# # sdm = sklearn.metrics.pairwise.euclidean_distances( cpdf_2015_2_nm.values )


# dm0 = pd.read_csv(fl , header=0 , sep = ' ')



# dm0 = pd.read_csv(fl , header=0 , sep = ' ')

    
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform as sqf

dmc03_cnt_ind = copy.deepcopy( dmc03 )

Dsqf = sqf( dmc03_cnt_ind , checks = False)
# Dsqf = sqf(dm0.values )
# print ( sdm.values - sdm.values.transpose()) 

# sdm
linkage_matrix = linkage(Dsqf, 'complete')
figure = plt.figure(figsize=(7.5, 5))
dendrogram(
    linkage_matrix,
    color_threshold=0,
    labels = pdf_cnt_ind_mrk2.columns 
)
plt.title('Hierarchical Clustering Dendrogram (Single)')
plt.xlabel('Currency Symbol')
plt.ylabel('AC measure')
plt.tight_layout()