def compute_fold(ARG): cv_train_idx, cv_val_idx = ARG train = slice_data(data_stuff, cv_train_idx, N_inputs) val = slice_data(data_stuff, cv_val_idx, N_inputs) model, metric = make_model(input_shape, data_stuff[1].shape, config['model_params']) logger.info('Model build') result = model.fit(train[0], train[1], sample_weight=train[2], validation_data=val[:2], **config['training_cfg']) return result.history, metric
def make_cv_on_current_set(data_stuff, indices_to_use, config): '''On current set, perform cv and extract model, score and best iteration ''' history = [] N_inputs, input_shape = get_input_shapes(data_stuff, config) #for cv_train_idx, cv_val_idx in pu_cv_splitter(indices_to_use, data_stuff[1]): def compute_fold(ARG): cv_train_idx, cv_val_idx = ARG train = slice_data(data_stuff, cv_train_idx, N_inputs) val = slice_data(data_stuff, cv_val_idx, N_inputs) model, metric = make_model(input_shape, data_stuff[1].shape, config['model_params']) logger.info('Model build') result = model.fit(train[0], train[1], sample_weight=train[2], validation_data=val[:2], **config['training_cfg']) return result.history, metric # history.append(result.history) splits = pu_cv_splitter(indices_to_use, data_stuff[1]) if args.nproc <= 1: history = [compute_fold(ARG) for ARG in splits] else: p = Pool(args.nproc) history = p.map(compute_fold, splits) del p metric = history[0][1] history = [ARG[0] for ARG in history] #====== Average =============== cv_av, cv_std = make_av_std(history, metric) cv_val_av, cv_val_std = make_av_std(history, 'val_%s' % metric) best_cv_idx = cv_val_av.argmax() #====== test =========== logger.info('Making Final model') train_stuff = slice_data(data_stuff, indices_to_use, N_inputs) model, metric = make_model(input_shape, data_stuff[1].shape, config['model_params']) logger.info('Model build') saver = SaveSelected(best_cv_idx) result = model.fit(train_stuff[0], train_stuff[1], sample_weight=train_stuff[2], callbacks=[saver], **config['training_cfg']) saver.reset() return { 'train': (cv_av, cv_std), 'val': (cv_val_av, cv_val_std), 'it': best_cv_idx, 'model': model }
def extract_reliable_negatives_fuselier(model, data_stuff, negative_idx, th=0.5, yshape=2, N_inputs=1): negative_stuff = slice_data(data_stuff, negative_idx, N_inputs) drugability = model.predict(negative_stuff[0]) if yshape == 2: drugability = drugability[:, 1] new_reliable_negatives_internal = np.where(drugability < th)[0] new_reliable_negatives = negative_idx[new_reliable_negatives_internal] return new_reliable_negatives, make_stats_from_vector( drugability), np.where(drugability > 0.5, 1, 0).sum()
batch_size = 100 ## add config?? later?? epochs = args.epochs history = [] N_inputs = config['model_params'].get('num_inputs', 1) if N_inputs == 1: input_shape = x.shape[1] else: input_shape = [arr.shape for arr in x] data_shapes = config.get('data_shapes', 'none') if data_shapes != 'none': input_shape = data_shapes for cv_train_idx, cv_val_idx in cv_splits: train = slice_data(data_stuff, cv_train_idx, N_inputs) val = slice_data(data_stuff, cv_val_idx, N_inputs) if is_multitask: logger.info('Train Y: %s' % str(np.sum(train[1], axis=1))) for i, vy in enumerate(train[1]): key = 'out%i' % i train[2][key] = balance_masked_weights(vy, train[2][key]) val[2][key] = balance_masked_weights(val[1][i], val[2][key]) else: train[2] = scale_weights(train[1], train[2], args.scale_positive) val[2] = scale_weights(val[1], val[2], args.scale_positive) logger.info('Train Y: %s' % str(np.sum(train[1], axis=0))) model, metric = make_model(input_shape, np.shape(y), config['model_params']) logger.info('Model build') result = model.fit(train[0],