def testNormalizeLike(self): a = np.empty((10, 3)) a[:, 0] = np.random.random(10) a[:, 1] = np.random.random(10) a[:, 2] = np.random.random(10) b = np.empty((10, 3)) b[:, 0] = np.random.random(10) b[:, 1] = np.random.random(10) b[:, 2] = np.random.random(10) b = b * 2 c = normalizeArrayLike(b, a) # Should be normalized like a mean = [] std = [] mean.append(np.mean(a[:, 0])) mean.append(np.mean(a[:, 1])) mean.append(np.mean(a[:, 2])) std.append(np.std(a[:, 0])) std.append(np.std(a[:, 1])) std.append(np.std(a[:, 2])) # Check all values for col in xrange(b.shape[1]): for bval, cval in zip(b[:, col].flat, c[:, col].flat): print cval, (bval - mean[col]) / std[col] print cval, bval assert cval == (bval - mean[col]) / std[col] print ("TestNormalizeLike success")
def loadData(): trnfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' + 'n4369_trainingtwothirds.csv') testfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' + 'n4369_targetthird.csv') columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos', 'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt'] targets = ['time_10y', 'event_10y'] # Normalize the test data as we normalized the training data normP, bah = parse_file(trnfile, inputcols=columns, targetcols=targets, normalize=False, separator=',', use_header=True) unNormedTestP, test_targets = parse_file(testfile, inputcols=columns, targetcols=targets, normalize=False, separator=',', use_header=True) test_data = normalizeArrayLike(unNormedTestP, normP) #If you want to return train data instead trn_data, trn_targets = parse_file(trnfile, inputcols=columns, targetcols=targets, normalize=True, separator=',', use_header=True) #return trn_data, trn_targets return test_data, test_targets
def loadData(): trnfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' + 'n4369_trainingtwothirds.csv') testfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' + 'n4369_targetthird.csv') columns = [ 'age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos', 'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt' ] targets = ['time_10y', 'event_10y'] # Normalize the test data as we normalized the training data normP, bah = parse_file(trnfile, inputcols=columns, targetcols=targets, normalize=False, separator=',', use_header=True) unNormedTestP, test_targets = parse_file(testfile, inputcols=columns, targetcols=targets, normalize=False, separator=',', use_header=True) test_data = normalizeArrayLike(unNormedTestP, normP) #If you want to return train data instead trn_data, trn_targets = parse_file(trnfile, inputcols=columns, targetcols=targets, normalize=True, separator=',', use_header=True) #return trn_data, trn_targets return test_data, test_targets
trnfile = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_trainingtwothirds.csv' columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos', 'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt'] targets = ['time_10y', 'event_10y'] # Normalize the test data as we normalized the training data normP, bah = parse_file(trnfile, inputcols = columns, targetcols = targets, normalize = False, separator = ',', use_header = True) unNormedTestP, test_targets = parse_file(testfile, inputcols = columns, targetcols = targets, normalize = False, separator = ',', use_header = True) test_data = normalizeArrayLike(unNormedTestP, normP) # Read the model from file savefile = '/home/gibson/jonask/Dropbox/Ann-Survival-Phd/publication_data/ann/cens_10y/2_tanh_1328829716.pcom' with open(savefile, 'r') as FILE: model = pickle.load(FILE) # Get a proper header map column_map = parse_headers_in_file(columns, testfile) # Explore variable changes variable_changes = main(model, test_data, test_targets, column_map) #Print results, sort by change print("\nCovariates, sorted by importance:")
inputcols=columns, targetcols=targets, normalize=False, separator=',', use_header=True) print("Retrieving test data...") unNormedTestP, T = parse_file(testdata, inputcols=columns, targetcols=targets, normalize=False, separator=',', use_header=True) print("Normalizing test data...") P = normalizeArrayLike(unNormedTestP, normP) #Scatter training data model_output_file = test_model(model, trainingdata, targets[0], targets[1], ',', time_step_size=2, *columns) scatterplot_files(model_output_file, 0, 2, model_output_file, 1) #Scatter test data model_output_file = test_model_arrays(model, testdata, P,
if __name__ == "__main__": #Test the model on the test data! model = '/home/gibson/jonask/Dropbox/Ann-Survival-Phd/publication_data/ann/cens_10y/2_tanh_1328829716.pcom' testdata = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_targetthird.csv' columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos', 'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt'] #targets = ['time_10y', 'event_10y'] trainingdata = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_trainingtwothirds.csv' print("Retrieving training data...") # Normalize the test data as we normalized the training data normP, bah = parse_file(trainingdata, inputcols = columns, normalize = False, separator = ',', use_header = True) print("Retrieving test data...") unNormedTestP, uT = parse_file(testdata, inputcols = columns, normalize = False, separator = ',', use_header = True) print("Normalizing test data...") P = normalizeArrayLike(unNormedTestP, normP) print("Getting outputs for test data...") #Wihtout targets, we only get the outputs outputs = test_model_arrays(model, testdata, P, None) print("We have outputs! Length: {}".format(len(outputs))) #model_output_file = test_model(model, testdata, None, None, *columns) #scatterplot_files(model_output_file, 0, 2, model_output_file, 1)