def plot_features(subject, data_path, model_path, test_labels, dataset='test'):
    with open(model_path + '/' + subject + '.pickle', 'rb') as f:
        state_dict = cPickle.load(f)
    cnn = ConvNet(state_dict['params'])
    cnn.set_weights(state_dict['weights'])
    scalers = state_dict['scalers']

    if dataset == 'test':
        d = load_test_data(data_path, subject)
        x = d['x']
        y = test_labels['preictal']
    elif dataset == 'train':
        d = load_train_data(data_path, subject)
        x, y = d['x'], d['y']
    else:
        raise ValueError('dataset')

    x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \
        else scale_across_features(x, x_test=None, scalers=scalers)

    cnn.batch_size.set_value(x.shape[0])
    get_features = theano.function([cnn.x, Param(cnn.training_mode, default=0)], cnn.feature_extractor.output,
                                 allow_input_downcast=True)

    logits_test = get_features(x)
    model = TSNE(n_components=2, random_state=0)
    z = model.fit_transform(np.float64(logits_test))
    plt.scatter(z[:, 0], z[:, 1], s=60, c=y)
    plt.show()
示例#2
0
def predict(subject, data_path, model_path, submission_path):
    patient_filenames = [
        filename for filename in os.listdir(model_path)
        if subject in filename and filename.endswith('.pickle')
    ]
    for filename in patient_filenames:
        print(filename)

        d = load_test_data(data_path, subject)
        x, id = d['x'], d['id']

        with open(model_path + '/' + filename, 'rb') as f:
            state_dict = pickle.load(f)

        scalers = state_dict['scalers']
        x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \
            else scale_across_features(x, x_test=None, scalers=scalers)

        cnn = ConvNet(state_dict['params'])
        cnn.set_weights(state_dict['weights'])
        test_proba = cnn.get_test_proba(x)

        ans = list(zip(id, test_proba))

        df = DataFrame(data=ans, columns=['clip', 'preictal'])
        csv_name = '.'.join(
            filename.split('.')[:-1]) if '.' in filename else filename
        df.to_csv(submission_path + '/' + csv_name + '.csv',
                  index=False,
                  header=True)
示例#3
0
def plot_train_probs(subject, data_path, model_path):
    with open(model_path + '/' + subject + '.pickle', 'rb') as f:
        state_dict = pickle.load(f)
    cnn = ConvNet(state_dict['params'])
    cnn.set_weights(state_dict['weights'])
    scalers = state_dict['scalers']

    d = load_train_data(data_path, subject)
    x, y = d['x'], d['y']

    x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \
        else scale_across_features(x, x_test=None, scalers=scalers)

    cnn.batch_size.set_value(x.shape[0])
    probs = cnn.get_test_proba(x)

    fpr, tpr, threshold = roc_curve(y, probs)
    c = np.sqrt((1 - tpr)**2 + fpr**2)
    opt_threshold = threshold[np.where(c == np.min(c))[0]]
    print(opt_threshold)

    x_coords = np.zeros(len(y), dtype='float64')
    rng = np.random.RandomState(42)
    x_coords += rng.normal(0.0, 0.08, size=len(x_coords))
    plt.scatter(x_coords, probs, c=y, s=60)
    plt.title(subject)
    plt.show()
def plot_train_probs(subject, data_path, model_path):
    with open(model_path + "/" + subject + ".pickle", "rb") as f:
        state_dict = cPickle.load(f)
    cnn = ConvNet(state_dict["params"])
    cnn.set_weights(state_dict["weights"])
    scalers = state_dict["scalers"]

    d = load_train_data(data_path, subject)
    x, y = d["x"], d["y"]

    x, _ = (
        scale_across_time(x, x_test=None, scalers=scalers)
        if state_dict["params"]["scale_time"]
        else scale_across_features(x, x_test=None, scalers=scalers)
    )

    cnn.batch_size.set_value(x.shape[0])
    probs = cnn.get_test_proba(x)

    fpr, tpr, threshold = roc_curve(y, probs)
    c = np.sqrt((1 - tpr) ** 2 + fpr ** 2)
    opt_threshold = threshold[np.where(c == np.min(c))[0]]
    print opt_threshold

    x_coords = np.zeros(len(y), dtype="float64")
    rng = np.random.RandomState(42)
    x_coords += rng.normal(0.0, 0.08, size=len(x_coords))
    plt.scatter(x_coords, probs, c=y, s=60)
    plt.title(subject)
    plt.show()
def predict(subject, data_path, model_path, submission_path):
    patient_filenames = [filename for filename in os.listdir(model_path) if
                         subject in filename and filename.endswith('.pickle')]
    for filename in patient_filenames:
        print filename

        d = load_test_data(data_path, subject)
        x, id = d['x'], d['id']

        with open(model_path + '/' + filename, 'rb') as f:
            state_dict = cPickle.load(f)

        scalers = state_dict['scalers']
        x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \
            else scale_across_features(x, x_test=None, scalers=scalers)

        cnn = ConvNet(state_dict['params'])
        cnn.set_weights(state_dict['weights'])
        test_proba = cnn.get_test_proba(x)

        ans = zip(id, test_proba)

        df = DataFrame(data=ans, columns=['clip', 'preictal'])
        csv_name = '.'.join(filename.split('.')[:-1]) if '.' in filename else filename
        df.to_csv(submission_path + '/' + csv_name + '.csv', index=False, header=True)
示例#6
0
def train(subject, data_path, model_path, model_params, validation_params):
    d = load_train_data(data_path, subject)
    x, y, filename_to_idx = d['x'], d['y'], d['filename_to_idx']
    x_test = load_test_data(data_path,
                            subject)['x'] if model_params['use_test'] else None

    # --------- add params
    model_params['n_channels'] = x.shape[1]
    model_params['n_fbins'] = x.shape[2]
    model_params['n_timesteps'] = x.shape[3]

    print '============ parameters'
    for key, value in model_params.items():
        print key, ':', value
    print '========================'

    x_train, y_train = None, None
    x_valid, y_valid = None, None

    if model_params['overlap']:
        # no validation if overlap
        filenames_grouped_by_hour = cPickle.load(open('filenames.pickle'))
        data_grouped_by_hour = load_grouped_train_data(
            data_path, subject, filenames_grouped_by_hour)
        x, y = generate_overlapped_data(data_grouped_by_hour,
                                        overlap_size=model_params['overlap'],
                                        window_size=x.shape[-1],
                                        overlap_interictal=True,
                                        overlap_preictal=True)
        print x.shape

        x, scalers = scale_across_time(x, x_test=None) if model_params['scale_time'] \
            else scale_across_features(x, x_test=None)

        cnn = ConvNet(model_params)
        cnn.train(train_set=(x, y), max_iter=175000)
        state_dict = cnn.get_state()
        state_dict['scalers'] = scalers
        with open(model_path + '/' + subject + '.pickle', 'wb') as f:
            cPickle.dump(state_dict, f, protocol=cPickle.HIGHEST_PROTOCOL)
        return
    else:
        if validation_params['random_split']:
            skf = StratifiedShuffleSplit(y,
                                         n_iter=1,
                                         test_size=0.25,
                                         random_state=0)
            for train_idx, valid_idx in skf:
                x_train, y_train = x[train_idx], y[train_idx]
                x_valid, y_valid = x[valid_idx], y[valid_idx]
        else:
            filenames_grouped_by_hour = cPickle.load(open('filenames.pickle'))
            d = split_train_valid_filenames(subject, filenames_grouped_by_hour)
            train_filenames, valid_filenames = d['train_filenames'], d[
                'valid_filnames']
            train_idx = [filename_to_idx[i] for i in train_filenames]
            valid_idx = [filename_to_idx[i] for i in valid_filenames]
            x_train, y_train = x[train_idx], y[train_idx]
            x_valid, y_valid = x[valid_idx], y[valid_idx]

    if model_params['scale_time']:
        x_train, scalers_train = scale_across_time(x=x_train, x_test=x_test)
        x_valid, _ = scale_across_time(x=x_valid,
                                       x_test=x_test,
                                       scalers=scalers_train)
    else:
        x_train, scalers_train = scale_across_features(x=x_train,
                                                       x_test=x_test)
        x_valid, _ = scale_across_features(x=x_valid,
                                           x_test=x_test,
                                           scalers=scalers_train)

    del x, x_test

    print '============ dataset'
    print 'train:', x_train.shape
    print 'n_pos:', np.sum(y_train), 'n_neg:', len(y_train) - np.sum(y_train)
    print 'valid:', x_valid.shape
    print 'n_pos:', np.sum(y_valid), 'n_neg:', len(y_valid) - np.sum(y_valid)

    # -------------- validate
    cnn = ConvNet(model_params)
    best_iter = cnn.validate(train_set=(x_train, y_train),
                             valid_set=(x_valid, y_valid),
                             valid_freq=validation_params['valid_freq'],
                             max_iter=validation_params['max_iter'],
                             fname_out=model_path + '/' + subject + '.txt')

    # ---------------- scale
    d = load_train_data(data_path, subject)
    x, y, filename_to_idx = d['x'], d['y'], d['filename_to_idx']
    x_test = load_test_data(data_path,
                            subject)['x'] if model_params['use_test'] else None

    x, scalers = scale_across_time(x=x, x_test=x_test) if model_params['scale_time'] \
        else scale_across_features(x=x, x_test=x_test)
    del x_test

    cnn = ConvNet(model_params)
    cnn.train(train_set=(x, y), max_iter=best_iter)
    state_dict = cnn.get_state()
    state_dict['scalers'] = scalers
    with open(model_path + '/' + subject + '.pickle', 'wb') as f:
        cPickle.dump(state_dict, f, protocol=cPickle.HIGHEST_PROTOCOL)
def train(subject, data_path, model_path, model_params, validation_params):
    d = load_train_data(data_path, subject)
    x, y, filename_to_idx = d['x'], d['y'], d['filename_to_idx']
    x_test = load_test_data(data_path, subject)['x'] if model_params['use_test'] else None

    # --------- add params
    model_params['n_channels'] = x.shape[1]
    model_params['n_fbins'] = x.shape[2]
    model_params['n_timesteps'] = x.shape[3]

    print '============ parameters'
    for key, value in model_params.items():
        print key, ':', value
    print '========================'

    x_train, y_train = None, None
    x_valid, y_valid = None, None

    if model_params['overlap']:
        # no validation if overlap
        filenames_grouped_by_hour = cPickle.load(open('filenames.pickle'))
        data_grouped_by_hour = load_grouped_train_data(data_path, subject, filenames_grouped_by_hour)
        x, y = generate_overlapped_data(data_grouped_by_hour, overlap_size=model_params['overlap'],
                                        window_size=x.shape[-1],
                                        overlap_interictal=True,
                                        overlap_preictal=True)
        print x.shape

        x, scalers = scale_across_time(x, x_test=None) if model_params['scale_time'] \
            else scale_across_features(x, x_test=None)

        cnn = ConvNet(model_params)
        cnn.train(train_set=(x, y), max_iter=175000)
        state_dict = cnn.get_state()
        state_dict['scalers'] = scalers
        with open(model_path + '/' + subject + '.pickle', 'wb') as f:
            cPickle.dump(state_dict, f, protocol=cPickle.HIGHEST_PROTOCOL)
        return
    else:
        if validation_params['random_split']:
            skf = StratifiedShuffleSplit(y, n_iter=1, test_size=0.25, random_state=0)
            for train_idx, valid_idx in skf:
                x_train, y_train = x[train_idx], y[train_idx]
                x_valid, y_valid = x[valid_idx], y[valid_idx]
        else:
            filenames_grouped_by_hour = cPickle.load(open('filenames.pickle'))
            d = split_train_valid_filenames(subject, filenames_grouped_by_hour)
            train_filenames, valid_filenames = d['train_filenames'], d['valid_filnames']
            train_idx = [filename_to_idx[i] for i in train_filenames]
            valid_idx = [filename_to_idx[i] for i in valid_filenames]
            x_train, y_train = x[train_idx], y[train_idx]
            x_valid, y_valid = x[valid_idx], y[valid_idx]

    if model_params['scale_time']:
        x_train, scalers_train = scale_across_time(x=x_train, x_test=x_test)
        x_valid, _ = scale_across_time(x=x_valid, x_test=x_test, scalers=scalers_train)
    else:
        x_train, scalers_train = scale_across_features(x=x_train, x_test=x_test)
        x_valid, _ = scale_across_features(x=x_valid, x_test=x_test, scalers=scalers_train)

    del x, x_test

    print '============ dataset'
    print 'train:', x_train.shape
    print 'n_pos:', np.sum(y_train), 'n_neg:', len(y_train) - np.sum(y_train)
    print 'valid:', x_valid.shape
    print 'n_pos:', np.sum(y_valid), 'n_neg:', len(y_valid) - np.sum(y_valid)

    # -------------- validate
    cnn = ConvNet(model_params)
    best_iter = cnn.validate(train_set=(x_train, y_train),
                             valid_set=(x_valid, y_valid),
                             valid_freq=validation_params['valid_freq'],
                             max_iter=validation_params['max_iter'],
                             fname_out=model_path + '/' + subject + '.txt')

    # ---------------- scale
    d = load_train_data(data_path, subject)
    x, y, filename_to_idx = d['x'], d['y'], d['filename_to_idx']
    x_test = load_test_data(data_path, subject)['x'] if model_params['use_test'] else None

    x, scalers = scale_across_time(x=x, x_test=x_test) if model_params['scale_time'] \
        else scale_across_features(x=x, x_test=x_test)
    del x_test

    cnn = ConvNet(model_params)
    cnn.train(train_set=(x, y), max_iter=best_iter)
    state_dict = cnn.get_state()
    state_dict['scalers'] = scalers
    with open(model_path + '/' + subject + '.pickle', 'wb') as f:
        cPickle.dump(state_dict, f, protocol=cPickle.HIGHEST_PROTOCOL)
示例#8
0
def initial_cifar():
    # initial cifar net
    cnn = ConvNet()

    conv1_params = {
        'HF': 5,
        'WF': 5,
        'DF': 3,
        'NF': 32,
        'stride': 1,
        'pad': 2,
        'var': 0.01
    }
    cnn.add_layer('conv', conv1_params)
    pooling1_params = {'HF': 3, 'WF': 3, 'stride': 2, 'pad': [0, 1, 0, 1]}
    cnn.add_layer('max_pooling', pooling1_params)
    cnn.add_layer('relu', {})

    conv2_params = {
        'HF': 5,
        'WF': 5,
        'DF': 32,
        'NF': 32,
        'stride': 1,
        'pad': 2,
        'var': 0.02
    }
    cnn.add_layer('conv', conv2_params)
    cnn.add_layer('relu', {})
    pooling2_params = {'HF': 3, 'WF': 3, 'stride': 2, 'pad': [0, 1, 0, 1]}
    cnn.add_layer('max_pooling', pooling2_params)

    conv3_params = {
        'HF': 5,
        'WF': 5,
        'DF': 32,
        'NF': 64,
        'stride': 1,
        'pad': 2,
        'var': 0.03
    }
    cnn.add_layer('conv', conv3_params)
    cnn.add_layer('relu', {})
    pooling3_params = {'HF': 3, 'WF': 3, 'stride': 2, 'pad': [0, 1, 0, 1]}
    cnn.add_layer('max_pooling', pooling3_params)

    conv4_params = {
        'HF': 4,
        'WF': 4,
        'DF': 64,
        'NF': 64,
        'stride': 1,
        'pad': 0,
        'var': 0.04
    }
    cnn.add_layer('conv', conv4_params)
    cnn.add_layer('relu', {})

    conv5_params = {
        'HF': 1,
        'WF': 1,
        'DF': 64,
        'NF': 10,
        'stride': 1,
        'pad': 0,
        'var': 0.05
    }
    cnn.add_layer('conv', conv5_params)

    cnn.add_layer('softmax-loss', {})

    return cnn
示例#9
0
def initial_LeNet():
    # initial LeNet
    cnn = ConvNet()

    conv1_params = {
        'HF': 5,
        'WF': 5,
        'DF': 1,
        'NF': 20,
        'stride': 1,
        'pad': 0,
        'var': 0.01
    }
    cnn.add_layer('conv', conv1_params)

    pooling1_params = {'HF': 2, 'WF': 2, 'stride': 2, 'pad': 0}
    cnn.add_layer('max_pooling', pooling1_params)

    conv2_params = {
        'HF': 5,
        'WF': 5,
        'DF': 20,
        'NF': 50,
        'stride': 1,
        'pad': 0,
        'var': 0.01
    }
    cnn.add_layer('conv', conv2_params)

    pooling2_params = {'HF': 2, 'WF': 2, 'stride': 2, 'pad': 0}
    cnn.add_layer('max_pooling', pooling2_params)

    conv3_params = {
        'HF': 4,
        'WF': 4,
        'DF': 50,
        'NF': 500,
        'stride': 1,
        'pad': 0,
        'var': 0.01
    }
    cnn.add_layer('conv', conv3_params)

    cnn.add_layer('relu', {})

    conv4_params = {
        'HF': 1,
        'WF': 1,
        'DF': 500,
        'NF': 10,
        'stride': 1,
        'pad': 0,
        'var': 0.01
    }
    cnn.add_layer('conv', conv4_params)

    cnn.add_layer('softmax-loss', {})

    return cnn
示例#10
0
文件: test.py 项目: IraKorshunova/CNN
print 'activation:', activation
print '===================='

#path = '/mnt/storage/usr/ikorshun/data/data08_npy/'
path = '../data/data' + patient + '_npy/'
files = glob.glob(path + 'X_*.npy')
files = [f.split('/')[-1] for f in files]
p = re.compile('\d+')
file_nums = [p.findall(f)[0] for f in files]
file_nums = np.asarray(file_nums, dtype='int32')


test_nums = np.asarray([4], dtype='int32')
out_file = open('out.txt', 'w')

rng = np.random.RandomState(424242)
for i in file_nums:
    print 'test', i
    test_set = DatasetsLoader.load(path, i)
    sets = DatasetsLoader.get_train_valid_set(path, file_nums[file_nums != i], rng)
    train_set = sets['train']
    valid_set = sets['valid']

    cnn = ConvNet(nkerns, recept_width, pool_width, dropout_prob, batch_size, activation)
    opt_iters = cnn.validate(train_set, valid_set, init_learning_rate, max_iters, validation_frequency,
                             improvement_threshold)
    cnn = ConvNet(nkerns, recept_width, pool_width, dropout_prob, batch_size, activation)
    train_set = np.concatenate((train_set[0], valid_set[0])), np.concatenate((train_set[1], valid_set[1]))
    cnn.test(train_set, test_set, init_learning_rate, init_learning_rate / max_iters, opt_iters, out_file)

out_file.close()
示例#11
0
def initial_cifar():
    # initial cifar net
    cnn = ConvNet()

    conv1_params = {'HF': 5, 'WF': 5, 'DF': 3, 'NF': 32, 'stride': 1, 'pad': 2, 'var': 0.01}
    cnn.add_layer('conv', conv1_params)
    pooling1_params = {'HF': 3, 'WF': 3, 'stride': 2, 'pad': [0, 1, 0, 1]}
    cnn.add_layer('max_pooling', pooling1_params)
    cnn.add_layer('relu', {})

    conv2_params = {'HF': 5, 'WF': 5, 'DF': 32, 'NF': 32, 'stride': 1, 'pad': 2, 'var': 0.02}
    cnn.add_layer('conv', conv2_params)
    cnn.add_layer('relu', {})
    pooling2_params = {'HF': 3, 'WF': 3, 'stride': 2, 'pad': [0, 1, 0, 1]}
    cnn.add_layer('max_pooling', pooling2_params)

    conv3_params = {'HF': 5, 'WF': 5, 'DF': 32, 'NF': 64, 'stride': 1, 'pad': 2, 'var': 0.02}
    cnn.add_layer('conv', conv3_params)
    cnn.add_layer('relu', {})
    pooling3_params = {'HF': 3, 'WF': 3, 'stride': 2, 'pad': [0, 1, 0, 1]}
    cnn.add_layer('max_pooling', pooling3_params)

    conv4_params = {'HF': 4, 'WF': 4, 'DF': 64, 'NF': 64, 'stride': 1, 'pad': 0, 'var': 0.02}
    cnn.add_layer('conv', conv4_params)
    cnn.add_layer('relu', {})

    conv5_params = {'HF': 1, 'WF': 1, 'DF': 64, 'NF': 10, 'stride': 1, 'pad': 0, 'var': 0.02}
    cnn.add_layer('conv', conv5_params)

    cnn.add_layer('softmax-loss', {})

    return cnn
示例#12
0
def initial_LeNet():
    # initial LeNet
    cnn = ConvNet()

    conv1_params = {'HF': 5, 'WF': 5, 'DF': 1, 'NF': 20, 'stride': 1, 'pad': 0, 'var': 0.01}
    cnn.add_layer('conv', conv1_params)

    pooling1_params = {'HF': 2, 'WF': 2, 'stride': 2, 'pad': 0}
    cnn.add_layer('max_pooling', pooling1_params)

    conv2_params = {'HF': 5, 'WF': 5, 'DF': 20, 'NF': 50, 'stride': 1, 'pad': 0, 'var': 0.01}
    cnn.add_layer('conv', conv2_params)

    pooling2_params = {'HF': 2, 'WF': 2, 'stride': 2, 'pad': 0}
    cnn.add_layer('max_pooling', pooling2_params)

    conv3_params = {'HF': 4, 'WF': 4, 'DF': 50, 'NF': 500, 'stride': 1, 'pad': 0, 'var': 0.01}
    cnn.add_layer('conv', conv3_params)

    cnn.add_layer('relu', {})

    conv4_params = {'HF': 1, 'WF': 1, 'DF': 500, 'NF': 10, 'stride': 1, 'pad': 0, 'var': 0.01}
    cnn.add_layer('conv', conv4_params)

    cnn.add_layer('softmax-loss', {})

    return cnn
示例#13
0
文件: test.py 项目: Coderx7/CNN-1
files = glob.glob(path + 'X_*.npy')
files = [f.split('/')[-1] for f in files]
p = re.compile('\d+')
file_nums = [p.findall(f)[0] for f in files]
file_nums = np.asarray(file_nums, dtype='int32')

test_nums = np.asarray([4], dtype='int32')
out_file = open('out.txt', 'w')

rng = np.random.RandomState(424242)
for i in file_nums:
    print 'test', i
    test_set = DatasetsLoader.load(path, i)
    sets = DatasetsLoader.get_train_valid_set(path, file_nums[file_nums != i],
                                              rng)
    train_set = sets['train']
    valid_set = sets['valid']

    cnn = ConvNet(nkerns, recept_width, pool_width, dropout_prob, batch_size,
                  activation)
    opt_iters = cnn.validate(train_set, valid_set, init_learning_rate,
                             max_iters, validation_frequency,
                             improvement_threshold)
    cnn = ConvNet(nkerns, recept_width, pool_width, dropout_prob, batch_size,
                  activation)
    train_set = np.concatenate((train_set[0], valid_set[0])), np.concatenate(
        (train_set[1], valid_set[1]))
    cnn.test(train_set, test_set, init_learning_rate,
             init_learning_rate / max_iters, opt_iters, out_file)

out_file.close()