Python parse_file示例，kalderstam.util.filehandling.parse_file Python示例

示例#1

0

显示文件

文件： benchmark_adv_test.py 项目： spacecowboy/Experiments

def cross_validation_test():
    glogger.setLoggingLevel(glogger.nothing)

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    #try:
    #    columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    #except SyntaxError:
    columns = (2, -4, -3, -2, -1)
    print('\nIncluding columns: ' + str(columns))

    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    #remove tail censored
    P, T = copy_without_tailcensored(P, T)

    #try:
    #    comsize = input("Number of networks to cross-validate [10]: ")
    #except SyntaxError:
    comsize = 10
    print('Number of networks to cross-validate: ' + str(comsize))

    #try:
    #    netsize = input('Number of hidden nodes [3]: ')
    #except SyntaxError as e:
    if len(sys.argv) < 2:
        netsize = 3
    else:
        netsize = sys.argv[1]
    print("Number of hidden nodes: " + str(netsize))

    #try:
    #    pop_size = input('Population size [50]: ')
    #except SyntaxError as e:
    pop_size = 50
    print("Population size: " + str(pop_size))

    #try:
    #    mutation_rate = input('Please input a mutation rate (0.25): ')
    #except SyntaxError as e:
    mutation_rate = 0.25
    print("Mutation rate: " + str(mutation_rate))

    #try:
    #    epochs = input("Number of generations (200): ")
    #except SyntaxError as e:
    epochs = 200
    print("Epochs: " + str(epochs))

    com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear')

    #1 is the column in the target array which holds teh binary censoring information
    test_errors, vald_errors = train_committee(com, train_evolutionary, P, T, 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate)

    print('\nTest Errors, Validation Errors:')
    for terr, verr in zip(test_errors.values(), vald_errors.values()):
        print(str(terr) + ", " + str(verr))

    print('\nTest average, Validation average:')
    print(str(sum(test_errors.values()) / len(test_errors.values())) + ', ' + str(sum(vald_errors.values()) / len(vald_errors.values())))

示例#2

0

显示文件

文件： cox_sumsquare_testing.py 项目： spacecowboy/Experiments

def experiment(net, filename, epochs):
    P, T = parse_file(filename, targetcols = [4], inputcols = [0, 1, 2, 3], ignorecols = [], ignorerows = [], normalize = False)
    #P = P[:100,:]
    #T = T[:100, :]

    try:
        #net = train_cox(net, (P, T), (None, None), timeslots, epochs = 500, learning_rate = 5)
        net = traingd(net, (P, T), (None, None), epochs = epochs, learning_rate = 0.01, block_size = 0)
        #net = train_evolutionary(net, (P, T), (None, None), epochs = epochs)
    except FloatingPointError:
        print('Aaawww....')
    outputs = net.sim(P)

    plot_network_weights(net)

    plt.figure()
    plt.title('Scatter plot sum square error\n' + filename)
    plt.xlabel('Survival time years')
    plt.ylabel('Network output')
    try:
        plt.scatter(T.flatten(), outputs.flatten(), c = 'g', marker = 's')
        plt.plot(T.flatten(), T.flatten(), 'r-')
    except:
        pass

示例#3

0

显示文件

文件： pima_investigate.py 项目： spacecowboy/Experiments

from os import path
from kalderstam.util.filehandling import parse_file, save_committee, load_committee
from kalderstam.neural.network import build_feedforward_committee, build_feedforward
from kalderstam.util.decorators import benchmark
from kalderstam.neural.training_functions import train_committee, traingd_block, train_evolutionary
import logging
from kalderstam.neural.matlab_functions import plotroc, stat
import matplotlib.pyplot as plt

logging.basicConfig(level=logging.DEBUG)
# load the training set
filename = path.join(path.expanduser("~"), "Kurser/ann_FYTN06/exercise1/pima_trn.dat")
inputs, targets = parse_file(filename, targetcols=8)

# load the test set
filename = path.join(path.expanduser("~"), "Kurser/ann_FYTN06/exercise1/pima_tst.dat")
test_inputs, tst_t = parse_file(filename)


test = (inputs, targets)
validation = ([], [])

com = load_committee("/export/home/jonask/Projects/aNeuralN/ANNs/pimatrain_rocarea84.0328358209.anncom")

# Estimate on test set now
# Y_test = com.sim(test_inputs)
# for value in Y_test:
#    print value[0]

Y_neg = com.update(test_inputs[68])
print Y_neg

示例#4

0

显示文件

文件： cox_genetic_committee.py 项目： spacecowboy/Experiments

def committee_test():

    try:
        netsize = input('Number of hidden nodes? [1]: ')
    except SyntaxError as e:
        netsize = 1

    try:
        comsize = input('Committee size? [1]: ')
    except SyntaxError as e:
        comsize = 1

    try:
        pop_size = input('Population size? [100]: ')
    except SyntaxError as e:
        pop_size = 100

    try:
        mutation_rate = input('Please input a mutation rate (0.05): ')
    except SyntaxError as e:
        mutation_rate = 0.05

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    try:
        columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    except SyntaxError:
        columns = (2, -4, -3, -2, -1)

    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)

    #remove tail censored
    try:
        cutoff = input('Cutoff for censored data? [9999 years]: ')
    except SyntaxError as e:
        cutoff = 9999
    P, T = copy_without_censored(P, T, cutoff)

    #Divide into validation sets
    try:
        test_size = float(input('Size of test set (not used in training)? Input in fractions. Default is [0.0]: '))
    except:
        test_size = 0.0
    ((TP, TT), (VP, VT)) = get_validation_set(P, T, validation_size = test_size, binary_column = 1)
    print("Length of training set: " + str(len(TP)))
    print("Length of test set: " + str(len(VP)))

    try:
        epochs = input("\nNumber of generations (1): ")
    except SyntaxError as e:
        epochs = 1

    com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear')

    #1 is the column in the target array which holds the binary censoring information
    test_errors, vald_errors, data_sets = train_committee(com, train_evolutionary, P, T, 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate)

    com.set_training_sets([set[0][0] for set in data_sets]) #first 0 gives training sets, second 0 gives inputs.

    print('\nTest C_indices, Validation C_indices:')
    for terr, verr in zip(test_errors.values(), vald_errors.values()):
        print(str(1 / terr) + ", " + str(1 / verr))

    if plt:
        outputs = numpy.array([[com.risk_eval(inputs)] for inputs in TP]) #Need double brackets for dimensions to be right for numpy
        kaplanmeier(time_array = TT[:, 0], event_array = TT[:, 1], output_array = outputs[:, 0], threshold = 0.5)
        train_c_index = get_C_index(TT, outputs)
        print("\nC-index on the training set: " + str(train_c_index))
        if len(VP) > 0:
            outputs = numpy.array([[com.risk_eval(inputs)] for inputs in VP]) #Need double brackets for dimensions to be right for numpy
            test_c_index = get_C_index(VT, outputs)
            kaplanmeier(time_array = VT[:, 0], event_array = VT[:, 1], output_array = outputs[:, 0], threshold = 0.5)
            print("C-index on the test set: " + str(test_c_index))

        #raw_input("\nPress enter to show plots...")
        plt.show()

    try:
        answer = input("\nDo you wish to print committee risk output? ['n']: ")
    except (SyntaxError, NameError):
        answer = 'n'

    if answer != 'n' and answer != 'no':
        inputs = read_data_file(filename)
        P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
        outputs = [[com.risk_eval(patient)] for patient in P]
        while len(inputs) > len(outputs):
            outputs.insert(0, ["net_output"])

        print("\n")
        for rawline in zip(inputs, outputs):
            line = ''
            for col in rawline[0]:
                line += str(col)
                line += ','
            for col in rawline[1]:
                line += str(col)

            print(line)

示例#5

0

显示文件

文件： cox_genetic_committee_cross.py 项目： spacecowboy/Experiments

def com_cross():

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    #try:
    #    columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    #except SyntaxError:
    #if len(sys.argv) < 3:
    columns = (2, -4, -3, -2, -1)
    #else:
    #    columns = [int(col) for col in sys.argv[2:]]

    print('\nIncluding columns: ' + str(columns))

    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    #remove tail censored
    #print('\nRemoving tail censored...')
    #P, T = copy_without_censored(P, T)

    #Divide into validation sets
    #test_size = 0.33
    #print('Size of test set (not used in training): ' + str(test_size))
    #((TP, TT), (VP, VT)) = get_validation_set(P, T, validation_size = test_size, binary_column = 1)

    print("\nData set:")
    print("Number of patients with events: " + str(T[:, 1].sum()))
    print("Number of censored patients: " + str((1 - T[:, 1]).sum()))

    #print("Length of training set: " + str(len(TP)))
    #print("Length of test set: " + str(len(VP)))

    #try:
    #    comsize = input("Number of networks to cross-validate [10]: ")
    #except SyntaxError:
    if len(sys.argv) < 2:
        netsize = 1
    else:
        netsize = int(sys.argv[1])
    print("\nNumber of hidden nodes: " + str(netsize))
    comsize = 4
    print('Number of members in each committee: ' + str(comsize))
    comnum = 5
    print('Number of committees to cross-validate: ' + str(comnum))

    times_to_cross = 3
    print('Number of times to repeat cross-validation: ' + str(times_to_cross))

    #try:
    #    pop_size = input('Population size [50]: ')
    #except SyntaxError as e:
    pop_size = 100
    print("Population size: " + str(pop_size))

    #try:
    #    mutation_rate = input('Please input a mutation rate (0.25): ')
    #except SyntaxError as e:
    mutation_rate = 0.05
    print("Mutation rate: " + str(mutation_rate))

    #try:
    #    epochs = input("Number of generations (200): ")
    #except SyntaxError as e:
    epochs = 100
    print("Epochs: " + str(epochs))

    for _cross_time in xrange(times_to_cross):

        data_sets = get_cross_validation_sets(P, T, comnum , binary_column = 1)

        print('\nTest Errors, Validation Errors:')

        for _com_num, (TS, VS) in zip(xrange(comnum), data_sets):
            com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear')

            #1 is the column in the target array which holds the binary censoring information
            test_errors, vald_errors, internal_sets = train_committee(com, train_evolutionary, TS[0], TS[1], 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate)

            com.set_training_sets([set[0][0] for set in internal_sets]) #first 0 gives training sets, second 0 gives inputs.

            outputs = numpy.array([[com.risk_eval(inputs)] for inputs in TS[0]]) #Need double brackets for dimensions to be right for numpy
            train_c_index = get_C_index(TS[1], outputs)
            outputs = numpy.array([[com.risk_eval(inputs)] for inputs in VS[0]]) #Need double brackets for dimensions to be right for numpy
            val_c_index = get_C_index(VS[1], outputs)

            print(str(1.0 / train_c_index) + ", " + str(1.0 / val_c_index))

示例#6

0

显示文件

文件： cox_analyze.py 项目： spacecowboy/Experiments

    except FloatingPointError:
        print('Aaawww....')
    outputs = net.sim(P)
    c_index = get_C_index(T, outputs)
    logger.info("C index = " + str(c_index))

    plot_network_weights(net)

    return net

if __name__ == "__main__":
    logging.basicConfig(level = logging.INFO)
    glogger.setLoggingLevel(glogger.debug)

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"
    P, T = parse_file(filename, targetcols = [4, 5], inputcols = [2, -4, -3, -2, -1], ignorerows = [0], normalize = True)
    #P, T = parse_file(filename, targetcols = [4, 5], inputcols = [2, -3], ignorerows = [0], normalize = True)

    #Remove tail censored
    P, T = copy_without_tailcensored(P, T)

    #Limit to incourage overtraining!
    #rows = sample(range(len(T)), 100)
    #P = P[rows]
    #T = T[rows]

    p = len(P[0]) #number of input covariates

    #net = load_network('/home/gibson/jonask/Projects/aNeuralN/ANNs/4x10x10x1.ann')
    net = build_feedforward(p, 30, 1, output_function = 'linear')

示例#7

0

显示文件

文件： cox_cox_testing.py 项目： spacecowboy/Experiments

    return net

if __name__ == "__main__":
    logging.basicConfig(level = logging.INFO)
    glogger.setLoggingLevel(glogger.nothing)

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    #try:
    #    columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    #except SyntaxError:
    columns = (2, -4, -3, -2, -1)
    print('\nIncluding columns: ' + str(columns))

    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    #remove tail censored
    #print('\nRemoving tail censored...')
    #P, T = copy_without_tailcensored(P, T)

    try:
        pieces = input('Number of crossvalidation pieces? [1]: ')
    except SyntaxError as e:
        pieces = 1

    #Divide into validation sets
    TandV = get_cross_validation_sets(P, T, pieces , binary_column = 1)

    for set, ((tP, tT), (vP, vT)) in zip(range(pieces), TandV):
        print("\nCross validation set " + str(set))
        print("Training")

示例#8

0

显示文件

文件： input_visualizer.py 项目： spacecowboy/aNeuralN

        scatter(P[:, currentCol], P[:, currentRow + 1], ax = ax, plotSlope = False)
        if currentCol == currentRow:
            ax.set_title(headers[currentCol])
        else:
            ax.set_title('')
        #ax.set_xlabel(headers[currentCol])
        if currentCol == 0:
            ax.set_ylabel(headers[currentRow+1])                
        
        #Finish with this
        currentCol += 1
    
if __name__ == '__main__':
    from kalderstam.util.filehandling import parse_file
    
    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/publication_data/Two_thirds_of_the_n4369_dataset_with_logs_lymf.txt"
    columns = ('age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos', 
               'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt', 'time')
    
    #filename = "/home/gibson/jonask/Projects/DataMaker/hard_survival_test.txt"    
    #columns = ('X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6',  'X7', 'X8', 'X9')
    #targets = ['censtime', 'event']
    #columns = ('time', 'censtime', 'noisytime', 'censnoisytime')
    targets = []
    
    P, T = parse_file(filename, targetcols = targets, inputcols = columns, normalize = True, separator = '\t',
                      use_header = True)
    plt.figure()
    scatter_all_inputs(P, columns)
    
    plt.show()

示例#9

0

显示文件

文件： benchmark_adv_test.py 项目： spacecowboy/Experiments

def train_single():
    try:
        netsize = input('Number of hidden nodes? [3]: ')
    except SyntaxError as e:
        netsize = 3

    try:
        pop_size = input('Population size? [50]: ')
    except SyntaxError as e:
        pop_size = 50

    try:
        mutation_rate = input('Please input a mutation rate (0.25): ')
    except SyntaxError as e:
        mutation_rate = 0.25

    SB22 = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_SB22.txt"
    Benmargskohorten = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_Benmargskohorten.txt"
    SB91b = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_SB91b.txt"
    all_studies = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    #Real data
    print("Studies to choose from:")
    print("1: SB22")
    print("2: Benmargskohorten")
    print("3: SB91b")
    print("0: All combined (default)")

    try:
        study = input("Which study to train on? [0]: ")
    except SyntaxError as e:
        study = 0

    if study == 1:
        filename = SB22
    elif study == 2:
        filename = Benmargskohorten
    elif study == 3:
        filename = SB91b
    else:
        filename = all_studies

    try:
        columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    except SyntaxError:
        columns = (2, -4, -3, -2, -1)
    #P, T = parse_file(filename, targetcols = [4, 5], inputcols = [2, -4, -3, -2, -1], ignorerows = [0], normalize = True)
    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)

    #Used for output comparison
    studies = {}
    studies[SB22] = parse_file(SB22, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    studies[Benmargskohorten] = parse_file(Benmargskohorten, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    studies[SB91b] = parse_file(SB91b, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    studies[all_studies] = parse_file(all_studies, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)

    #remove tail censored
    #P, T = copy_without_tailcensored(P, T)

    #Divide into validation sets
    #((tP, tT), (vP, vT)) = get_validation_set(P, T, validation_size = 0.25, binary_column = 1)
    TandV = get_cross_validation_sets(P, T, 2 , binary_column = 1)

    #Network part

    p = len(P[0]) #number of input covariates

    net = build_feedforward(p, netsize, 1, output_function = 'linear')
    #net = build_feedforward_multilayered(p, [7, 10], 1, output_function = 'linear')

    try:
        epochs = input("Number of generations (200): ")
    except SyntaxError as e:
        epochs = 200

    for times, ((tP, tT), (vP, vT)) in zip(xrange(2), TandV):
        #train
        net = test(net, tP, tT, vP, vT, filename, epochs, population_size = pop_size, mutation_rate = mutation_rate)

        raw_input("Press enter to show plots...")
        glogger.show()

示例#10

0

显示文件

文件： cox_genetic_cross.py 项目： spacecowboy/Experiments

def cross_validation_test():

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    #try:
    #    columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    #except SyntaxError:
    if len(sys.argv) < 3:
        columns = (2, -4, -3, -2, -1)
    else:
        columns = [int(col) for col in sys.argv[2:]]

    print('\nIncluding columns: ' + str(columns))

    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    #remove tail censored
    #print('\nRemoving tail censored...')
    #P, T = copy_without_censored(P, T)

    print("\nData set:")
    print("Number of patients with events: " + str(T[:, 1].sum()))
    print("Number of censored patients: " + str((1 - T[:, 1]).sum()))

    #try:
    #    comsize = input("Number of networks to cross-validate [10]: ")
    #except SyntaxError:
    comsize = 5
    print('\nNumber of networks to cross-validate: ' + str(comsize))

    times_to_cross = 3
    print('\nNumber of times to repeat cross-validation: ' + str(times_to_cross))

    #try:
    #    netsize = input('Number of hidden nodes [3]: ')
    #except SyntaxError as e:
    if len(sys.argv) < 2:
        netsize = 1
    else:
        netsize = int(sys.argv[1])
    print("Number of hidden nodes: " + str(netsize))

    #try:
    #    pop_size = input('Population size [50]: ')
    #except SyntaxError as e:
    pop_size = 100
    print("Population size: " + str(pop_size))

    #try:
    #    mutation_rate = input('Please input a mutation rate (0.25): ')
    #except SyntaxError as e:
    mutation_rate = 0.05
    print("Mutation rate: " + str(mutation_rate))

    #try:
    #    epochs = input("Number of generations (200): ")
    #except SyntaxError as e:
    epochs = 400
    print("Epochs: " + str(epochs))

    for _ in xrange(times_to_cross):
        com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear')

        #1 is the column in the target array which holds the binary censoring information
        test_errors, vald_errors, data_sets = train_committee(com, train_evolutionary, P, T, 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate)

        print('\nTest Errors, Validation Errors:')
        for terr, verr in zip(test_errors.values(), vald_errors.values()):
            print(str(terr) + ", " + str(verr))

示例#11

0

显示文件

文件： cox_genetic.py 项目： spacecowboy/Experiments

def train_single():
    try:
        netsize = input('Number of hidden nodes? [1]: ')
    except SyntaxError as e:
        netsize = 1

    try:
        pop_size = input('Population size? [100]: ')
    except SyntaxError as e:
        pop_size = 100

    try:
        mutation_rate = input('Please input a mutation rate (0.05): ')
    except SyntaxError as e:
        mutation_rate = 0.05

    SB22 = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_SB22.txt"
    Benmargskohorten = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_Benmargskohorten.txt"
    SB91b = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_SB91b.txt"
    all_studies = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    #Real data
    print("Studies to choose from:")
    print("1: SB22")
    print("2: Benmargskohorten")
    print("3: SB91b")
    print("0: All combined (default)")

    try:
        study = input("Which study to train on? [0]: ")
    except SyntaxError as e:
        study = 0

    if study == 1:
        filename = SB22
    elif study == 2:
        filename = Benmargskohorten
    elif study == 3:
        filename = SB91b
    else:
        filename = all_studies

    try:
        columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    except SyntaxError:
        columns = (2, -4, -3, -2, -1)
    #P, T = parse_file(filename, targetcols = [4, 5], inputcols = [2, -4, -3, -2, -1], ignorerows = [0], normalize = True)
    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)

    #Used for output comparison
    studies = {}
    studies[SB22] = parse_file(SB22, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    studies[Benmargskohorten] = parse_file(Benmargskohorten, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    studies[SB91b] = parse_file(SB91b, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
    studies[all_studies] = parse_file(all_studies, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)

    #remove tail censored
    try:
        cutoff = input('Cutoff for censored data? [9999 years]: ')
    except SyntaxError as e:
        cutoff = 9999
    P, T = copy_without_censored(P, T, cutoff)

    #Divide into validation sets
    try:
        pieces = input('Size of validation set? Input denominator (1 for no validation set). Default is 1/[1] parts: ')
    except:
        pieces = 1
    TandV = get_cross_validation_sets(P, T, pieces , binary_column = 1)

    #Network part

    p = len(P[0]) #number of input covariates

    net = build_feedforward(p, netsize, 1, output_function = 'linear')
    #net = build_feedforward_multilayered(p, [7, 10], 1, output_function = 'linear')

    #Initial state
    #outputs = net.sim(tP)
    #orderscatter(outputs, tT, filename, 's')

    try:
        epochs = input("Number of generations (1): ")
    except SyntaxError as e:
        epochs = 1

    for ((tP, tT), (vP, vT)) in TandV:
        #train
        net = test(net, tP, tT, vP, vT, filename, epochs, population_size = pop_size, mutation_rate = mutation_rate)

        if plt:
            outputs = net.sim(tP)
            threshold = kaplanmeier(time_array = tT[:, 0], event_array = tT[:, 1], output_array = outputs[:, 0])
            if len(vP) > 0:
                outputs = net.sim(vP)
                kaplanmeier(time_array = vT[:, 0], event_array = vT[:, 1], output_array = outputs[:, 0], threshold = threshold)
            print("\nThreshold dividing the training set in two equal pieces: " + str(threshold))

            raw_input("\nPress enter to show plots...")
            plt.show()
        try:
            answer = input("Do you wish to print network output? Enter filename, or 'no' / 'n'. ['n']: ")
        except (SyntaxError, NameError):
            answer = 'n'
        if os.path.exists(answer):
            print("File exists. Will add random number to front")
            answer = str(random.randint(0, 123456)) + answer
        if answer != 'n' and answer != 'no':
            print_output(answer, net, filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)