Exemplo n.º 1
0
def find_solution(P, T):
                
    #test, validation = get_validation_set(P, T, validation_size = 0.33)
    net = build_feedforward(input_number = len(P[0]), hidden_number = 4, output_number = len(T[0]))
    #com = build_feedforward_committee(size = 4, input_number = len(P[0]), hidden_number = 6, output_number = len(T[0]))
    
    epochs = 1000
    
    testset, valset = get_validation_set(P, T, validation_size = 0.01)
    
    print("Training...")
    net = benchmark(train_evolutionary)(net, testset, valset, 100, random_range = 1)
    net = benchmark(traingd)(net, testset, valset, epochs, learning_rate = 0.1, block_size = 1)
    
    #benchmark(train_committee)(com, train_evolutionary, P, T, 100, random_range = 1)
    #benchmark(train_committee)(com, traingd, P, T, epochs, learning_rate = 0.1, block_size = 30)
    
    #P, T = test
    Y = net.sim(P)
    area, best_cut = plotroc(Y, T, 1)
    plot2d2c(net, P, T, figure = 2, cut = best_cut)
    
    #P, T = validation
    #Y = com.sim(P)
    #plotroc(Y, T, 2)
    
#    print("")
#    print("Stats for cut = 0.5")
#    [num_correct_first, num_correct_second, total_performance, num_first, num_second, missed] = stat(Y, T)
    
    #save_network(best, "/export/home/jonask/Projects/aNeuralN/ANNs/classification_gdblock20_rocarea" + str(area) + ".ann")
    #save_network(best, "/export/home/jonask/Projects/aNeuralN/ANNs/classification_genetic_rocarea" + str(area) + ".ann")
    #save_committee(com, "/export/home/jonask/Projects/aNeuralN/ANNs/classification_gdblock30_rocarea" + str(area) + ".anncom")
    #save_committee(com, "/export/home/jonask/Projects/aNeuralN/ANNs/classification_genetic_rocarea" + str(area) + ".anncom")
    
    plt.show()
def committee_test():

    try:
        netsize = input('Number of hidden nodes? [1]: ')
    except SyntaxError as e:
        netsize = 1

    try:
        comsize = input('Committee size? [1]: ')
    except SyntaxError as e:
        comsize = 1

    try:
        pop_size = input('Population size? [100]: ')
    except SyntaxError as e:
        pop_size = 100

    try:
        mutation_rate = input('Please input a mutation rate (0.05): ')
    except SyntaxError as e:
        mutation_rate = 0.05

    filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt"

    try:
        columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n")
    except SyntaxError:
        columns = (2, -4, -3, -2, -1)

    P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)

    #remove tail censored
    try:
        cutoff = input('Cutoff for censored data? [9999 years]: ')
    except SyntaxError as e:
        cutoff = 9999
    P, T = copy_without_censored(P, T, cutoff)

    #Divide into validation sets
    try:
        test_size = float(input('Size of test set (not used in training)? Input in fractions. Default is [0.0]: '))
    except:
        test_size = 0.0
    ((TP, TT), (VP, VT)) = get_validation_set(P, T, validation_size = test_size, binary_column = 1)
    print("Length of training set: " + str(len(TP)))
    print("Length of test set: " + str(len(VP)))

    try:
        epochs = input("\nNumber of generations (1): ")
    except SyntaxError as e:
        epochs = 1

    com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear')

    #1 is the column in the target array which holds the binary censoring information
    test_errors, vald_errors, data_sets = train_committee(com, train_evolutionary, P, T, 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate)

    com.set_training_sets([set[0][0] for set in data_sets]) #first 0 gives training sets, second 0 gives inputs.

    print('\nTest C_indices, Validation C_indices:')
    for terr, verr in zip(test_errors.values(), vald_errors.values()):
        print(str(1 / terr) + ", " + str(1 / verr))

    if plt:
        outputs = numpy.array([[com.risk_eval(inputs)] for inputs in TP]) #Need double brackets for dimensions to be right for numpy
        kaplanmeier(time_array = TT[:, 0], event_array = TT[:, 1], output_array = outputs[:, 0], threshold = 0.5)
        train_c_index = get_C_index(TT, outputs)
        print("\nC-index on the training set: " + str(train_c_index))
        if len(VP) > 0:
            outputs = numpy.array([[com.risk_eval(inputs)] for inputs in VP]) #Need double brackets for dimensions to be right for numpy
            test_c_index = get_C_index(VT, outputs)
            kaplanmeier(time_array = VT[:, 0], event_array = VT[:, 1], output_array = outputs[:, 0], threshold = 0.5)
            print("C-index on the test set: " + str(test_c_index))

        #raw_input("\nPress enter to show plots...")
        plt.show()

    try:
        answer = input("\nDo you wish to print committee risk output? ['n']: ")
    except (SyntaxError, NameError):
        answer = 'n'

    if answer != 'n' and answer != 'no':
        inputs = read_data_file(filename)
        P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)
        outputs = [[com.risk_eval(patient)] for patient in P]
        while len(inputs) > len(outputs):
            outputs.insert(0, ["net_output"])

        print("\n")
        for rawline in zip(inputs, outputs):
            line = ''
            for col in rawline[0]:
                line += str(col)
                line += ','
            for col in rawline[1]:
                line += str(col)

            print(line)