def read_training_data():
    return cancer_data.read_training_data('inner_product/train.data')
Пример #2
0
        v.D = features | set(feature_vectors.keys()) | {'gamma'}
    #print(main[919555])
    #print(next(iter(main.values())))
    # print(main[-919555])
    return matutil.rowdict2mat(main)


def make_b(ids):
    return Vec(ids, {k: 1 if k > 0 else 0 for k in ids})


def make_c(features, ids):
    return Vec(features | ids | {'gamma'}, {k: 1 for k in ids})


F, d = read_training_data('train.data')
# print(A.D[0])
features = {'texture(worst)', 'area(worst)'}
A = make_matrix(matutil.mat2rowdict(F), d, features)
b = make_b(A.D[0])
c = make_c(features, F.D[0])
R_square = F.D[0].copy()
n = len(A.D[1])
it = iter(F.D[0])
while len(R_square) < n:
    R_square.add(-next(it))

print(A.D[1])
find_vertex(A, b, R_square)
print(R_square)
print('vertex found')
Пример #3
0
def test_classifier(fname, w, gamma):
    patient_features, diagnosis = read_training_data(fname, FEATUREs)
    return sum(1 if C(features, w, gamma) == diagnoses[lbl] else 0 for lbl, features in mat2rowdict(patient_features))
Пример #4
0
from cancer_data import read_training_data
from matutil import rowdict2mat, mat2rowdict
import simplex
from vec import Vec
from mat import Mat

FEATUREs = {"area(worst)", "smoothness(worst)", "texture(mean)"}

### read training data
print("== Read Training Data ==")
(patient_features, diagnoses) = read_training_data("train.data", FEATUREs)
IDs = patient_features.D[0]

#### Set matrix A
### set A labels
A_COLS = FEATUREs.union(IDs)
A_COLS = A_COLS.union({"gamma"})

A_ROWS = IDs.union({-i for i in IDs})

### set A content
## Task 13.13.1
def main_constraint(i, pf_i, d_i, features):
    v = Vec(A_COLS, {f: d_i * pf_i[f] for f in features})
    v["gamma"] = -1 * d_i
    v[i] = 1
    return v


## Task 13.13.2
def make_matrix(feature_vectors, diagnoses, features):
Пример #5
0
from VectorClass import Vec
from cancer_data import read_training_data
from matutil import mat2rowdict, listlist2mat
from vectorTasks import zero_vec, list2vec
from orthogonality import QR_solve

(A, b) = read_training_data('train.data')
(C, d) = read_training_data('validate.data')
w = Vec(A.D[1], {key: 0 for key in A.D[1]})


def signum(u):
    return Vec(u.D, {key: 1 if u[key] >= 0 else -1 for key in u.f})


# the procedure fraction wrong(A, b, w) with the following spec:
# • input: An R×C matrix A whose rows are feature vectors, an R-vector b whose entries are +1 and −1, and a C-vector w
# • output: The fraction of of row labels r of A such that the sign of (row r of A)·w differs from that of b[r].
def fraction_wrong(A, b, w):
    outputVec = signum(A * w)
    return len([key for key in outputVec.D if outputVec[key] != b[key]]) / len(
        A.D[0])


# a procedure loss(A, b, w) that takes as input the training data A, b and a hypothesis vector w,
# and returns the value L(w) of the loss function for input w.
def loss(A, b, w):
    print(((A * w) - b) * ((A * w) - b))


def find_grad(A, b, w):
Пример #6
0
    for i in range(T):
        w = gradient_descent_step(A, b, w, sigma)
        # if i % 30 == 0:
        # print_statistics(A, b, w)
    # elapsed_time = time.time() - start_time
    #print("Elapsed time running ", T, " iterations: ", elapsed_time)
    return w


def print_statistics(A, b, w):
    print("Loss function: ", loss(A, b, w))
    print("Percent wrong: ", fraction_wrong(A, b, w))


if __name__ == '__main__':
    A_train, b_train = read_training_data('train.data')
    w_0 = Vec(A_train.D[1], {col: 0 for col in A_train.D[1]})
    w_1 = Vec(A_train.D[1], {col: 1 for col in A_train.D[1]})
    step_a = 10**(-9)
    step_b = 2 * 10**(-9)

    hyp_0a = gradient_descent(A_train, b_train, w_0, step_a, 1000)
    print("Statistics for initial weights 0, step size 10^-9:")
    print_statistics(A_train, b_train, hyp_0a)

    hyp_1a = gradient_descent(A_train, b_train, w_1, step_a, 1000)
    print("Statistics for initial weights 1, step size 10^-9:")
    print_statistics(A_train, b_train, hyp_0a)

    hyp_0b = gradient_descent(A_train, b_train, w_0, step_b, 1000)
    print("Statistics for initial weights 0, step size 2*10^-9:")