def survival_stat(filename, thresholds = None): data = np.array(read_data_file(filename, ",")) D, t = parse_data(data, inputcols = (2, 3, 4, 5, 6, 7, 8, 9, 10), ignorerows = [0], normalize = False) T = D[:, (2, 3)] outputs = D[:, (-1, 3)] C = get_C_index(T, outputs) print("C-index: " + str(C)) print("Genetic error: " + str(1 / C)) th = kaplanmeier(D, 2, 3, -1, threshold = thresholds) print("Threshold dividing the set in two equal pieces: " + str(th)) if plt: plt.show()
def committee_test(): try: netsize = input('Number of hidden nodes? [1]: ') except SyntaxError as e: netsize = 1 try: comsize = input('Committee size? [1]: ') except SyntaxError as e: comsize = 1 try: pop_size = input('Population size? [100]: ') except SyntaxError as e: pop_size = 100 try: mutation_rate = input('Please input a mutation rate (0.05): ') except SyntaxError as e: mutation_rate = 0.05 filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt" try: columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n") except SyntaxError: columns = (2, -4, -3, -2, -1) P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) #remove tail censored try: cutoff = input('Cutoff for censored data? [9999 years]: ') except SyntaxError as e: cutoff = 9999 P, T = copy_without_censored(P, T, cutoff) #Divide into validation sets try: test_size = float(input('Size of test set (not used in training)? Input in fractions. Default is [0.0]: ')) except: test_size = 0.0 ((TP, TT), (VP, VT)) = get_validation_set(P, T, validation_size = test_size, binary_column = 1) print("Length of training set: " + str(len(TP))) print("Length of test set: " + str(len(VP))) try: epochs = input("\nNumber of generations (1): ") except SyntaxError as e: epochs = 1 com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear') #1 is the column in the target array which holds the binary censoring information test_errors, vald_errors, data_sets = train_committee(com, train_evolutionary, P, T, 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate) com.set_training_sets([set[0][0] for set in data_sets]) #first 0 gives training sets, second 0 gives inputs. print('\nTest C_indices, Validation C_indices:') for terr, verr in zip(test_errors.values(), vald_errors.values()): print(str(1 / terr) + ", " + str(1 / verr)) if plt: outputs = numpy.array([[com.risk_eval(inputs)] for inputs in TP]) #Need double brackets for dimensions to be right for numpy kaplanmeier(time_array = TT[:, 0], event_array = TT[:, 1], output_array = outputs[:, 0], threshold = 0.5) train_c_index = get_C_index(TT, outputs) print("\nC-index on the training set: " + str(train_c_index)) if len(VP) > 0: outputs = numpy.array([[com.risk_eval(inputs)] for inputs in VP]) #Need double brackets for dimensions to be right for numpy test_c_index = get_C_index(VT, outputs) kaplanmeier(time_array = VT[:, 0], event_array = VT[:, 1], output_array = outputs[:, 0], threshold = 0.5) print("C-index on the test set: " + str(test_c_index)) #raw_input("\nPress enter to show plots...") plt.show() try: answer = input("\nDo you wish to print committee risk output? ['n']: ") except (SyntaxError, NameError): answer = 'n' if answer != 'n' and answer != 'no': inputs = read_data_file(filename) P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) outputs = [[com.risk_eval(patient)] for patient in P] while len(inputs) > len(outputs): outputs.insert(0, ["net_output"]) print("\n") for rawline in zip(inputs, outputs): line = '' for col in rawline[0]: line += str(col) line += ',' for col in rawline[1]: line += str(col) print(line)