Exemplo n.º 1
0
    def testNormalizeLike(self):
        a = np.empty((10, 3))
        a[:, 0] = np.random.random(10)
        a[:, 1] = np.random.random(10)
        a[:, 2] = np.random.random(10)

        b = np.empty((10, 3))
        b[:, 0] = np.random.random(10)
        b[:, 1] = np.random.random(10)
        b[:, 2] = np.random.random(10)
        b = b * 2

        c = normalizeArrayLike(b, a)

        # Should be normalized like a
        mean = []
        std = []
        mean.append(np.mean(a[:, 0]))
        mean.append(np.mean(a[:, 1]))
        mean.append(np.mean(a[:, 2]))
        std.append(np.std(a[:, 0]))
        std.append(np.std(a[:, 1]))
        std.append(np.std(a[:, 2]))

        # Check all values
        for col in xrange(b.shape[1]):
            for bval, cval in zip(b[:, col].flat, c[:, col].flat):
                print cval, (bval - mean[col]) / std[col]
                print cval, bval
                assert cval == (bval - mean[col]) / std[col]
        print ("TestNormalizeLike success")
Exemplo n.º 2
0
def loadData():
    trnfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' +
               'n4369_trainingtwothirds.csv')
    testfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' +
                'n4369_targetthird.csv')
    columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize',
               'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos',
               'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt']
    targets = ['time_10y', 'event_10y']

    # Normalize the test data as we normalized the training data
    normP, bah = parse_file(trnfile, inputcols=columns,
                            targetcols=targets, normalize=False,
                            separator=',', use_header=True)

    unNormedTestP, test_targets = parse_file(testfile, inputcols=columns,
                                  targetcols=targets, normalize=False,
                                  separator=',', use_header=True)

    test_data = normalizeArrayLike(unNormedTestP, normP)

    #If you want to return train data instead
    trn_data, trn_targets = parse_file(trnfile, inputcols=columns,
                            targetcols=targets, normalize=True,
                            separator=',', use_header=True)

    #return trn_data, trn_targets
    return test_data, test_targets
Exemplo n.º 3
0
def loadData():
    trnfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' +
               'n4369_trainingtwothirds.csv')
    testfile = ('/home/gibson/jonask/DataSets/breast_cancer_1/' +
                'n4369_targetthird.csv')
    columns = [
        'age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)',
        'log(1+pgr_cyt)', 'pgr_cyt_pos', 'er_cyt_pos', 'size_gt_20', 'er_cyt',
        'pgr_cyt'
    ]
    targets = ['time_10y', 'event_10y']

    # Normalize the test data as we normalized the training data
    normP, bah = parse_file(trnfile,
                            inputcols=columns,
                            targetcols=targets,
                            normalize=False,
                            separator=',',
                            use_header=True)

    unNormedTestP, test_targets = parse_file(testfile,
                                             inputcols=columns,
                                             targetcols=targets,
                                             normalize=False,
                                             separator=',',
                                             use_header=True)

    test_data = normalizeArrayLike(unNormedTestP, normP)

    #If you want to return train data instead
    trn_data, trn_targets = parse_file(trnfile,
                                       inputcols=columns,
                                       targetcols=targets,
                                       normalize=True,
                                       separator=',',
                                       use_header=True)

    #return trn_data, trn_targets
    return test_data, test_targets
    trnfile = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_trainingtwothirds.csv'
    columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize',
               'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos',
               'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt']
    targets = ['time_10y', 'event_10y']

    # Normalize the test data as we normalized the training data
    normP, bah = parse_file(trnfile, inputcols = columns,
                            targetcols = targets, normalize = False, separator = ',',
                            use_header = True)

    unNormedTestP, test_targets = parse_file(testfile, inputcols = columns,
                                  targetcols = targets, normalize = False,
                                  separator = ',', use_header = True)

    test_data = normalizeArrayLike(unNormedTestP, normP)

    # Read the model from file
    savefile = '/home/gibson/jonask/Dropbox/Ann-Survival-Phd/publication_data/ann/cens_10y/2_tanh_1328829716.pcom'

    with open(savefile, 'r') as FILE:
        model = pickle.load(FILE)

    # Get a proper header map
    column_map = parse_headers_in_file(columns, testfile)

    # Explore variable changes
    variable_changes = main(model, test_data, test_targets, column_map)

    #Print results, sort by change
    print("\nCovariates, sorted by importance:")
                            inputcols=columns,
                            targetcols=targets,
                            normalize=False,
                            separator=',',
                            use_header=True)

    print("Retrieving test data...")
    unNormedTestP, T = parse_file(testdata,
                                  inputcols=columns,
                                  targetcols=targets,
                                  normalize=False,
                                  separator=',',
                                  use_header=True)

    print("Normalizing test data...")
    P = normalizeArrayLike(unNormedTestP, normP)

    #Scatter training data
    model_output_file = test_model(model,
                                   trainingdata,
                                   targets[0],
                                   targets[1],
                                   ',',
                                   time_step_size=2,
                                   *columns)
    scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    #Scatter test data
    model_output_file = test_model_arrays(model,
                                          testdata,
                                          P,
if __name__ == "__main__":
    #Test the model on the test data!
    model = '/home/gibson/jonask/Dropbox/Ann-Survival-Phd/publication_data/ann/cens_10y/2_tanh_1328829716.pcom'
    testdata = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_targetthird.csv'
    columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos',
               'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt']

    #targets = ['time_10y', 'event_10y']
    trainingdata = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_trainingtwothirds.csv'

    print("Retrieving training data...")
    # Normalize the test data as we normalized the training data
    normP, bah = parse_file(trainingdata, inputcols = columns, normalize = False, separator = ',',
                      use_header = True)

    print("Retrieving test data...")
    unNormedTestP, uT = parse_file(testdata, inputcols = columns, normalize = False, separator = ',',
                      use_header = True)

    print("Normalizing test data...")
    P = normalizeArrayLike(unNormedTestP, normP)

    print("Getting outputs for test data...")
    #Wihtout targets, we only get the outputs
    outputs = test_model_arrays(model, testdata, P, None)
    print("We have outputs! Length: {}".format(len(outputs)))

    #model_output_file = test_model(model, testdata, None, None, *columns)
    #scatterplot_files(model_output_file, 0, 2, model_output_file, 1)