def _train(df, bhv_df, args): # get X-y from df feature = [ii for ii in df.columns if 'feat' in ii] k_feat = len(feature) print('number of features = %d' % (k_feat)) k_clip = len(np.unique(df['c'])) print('number of clip = %d' % (k_clip)) subject_list = bhv_df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] # length of each clip clip_time = np.zeros(k_clip) for ii in range(k_clip): class_df = df[df['c'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float _info('seq lengths = %s' % clip_time) # init dict for all results results = {} # true and predicted scores and clip label results['y'] = {} results['y_hat'] = {} results['c'] = {} for score in SCORES: # mean scores across time results['train_%s' % score] = np.zeros(args.k_fold) results['val_%s' % score] = np.zeros(args.k_fold) # per clip temporal score results['t_train_%s' % score] = {} results['t_val_%s' % score] = {} for ii in range(k_clip): results['t_train_%s' % score][ii] = np.zeros( (args.k_fold, clip_time[ii])) results['t_val_%s' % score][ii] = np.zeros( (args.k_fold, clip_time[ii])) kf = KFold(n_splits=args.k_fold, random_state=K_SEED) # get participant lists for each assigned class # ensure they're only in train_list class_list = {} for ii in range(args.k_class): class_list[ii] = bhv_df[(bhv_df['Subject'].isin(train_list)) & (bhv_df['y'] == ii)]['Subject'].values print('No. of participants in class {} = {}'.format( ii, len(class_list[ii]))) ''' split participants in each class with kf nearly identical ratio of train and val, in all classes ''' split = {} for ii in range(args.k_class): split[ii] = kf.split(class_list[ii]) for i_fold in range(args.k_fold): _info('fold: %d/%d' % (i_fold + 1, args.k_fold)) # ***between-subject train-val split train_subs, val_subs = [], [] for ii in range(args.k_class): train, val = next(split[ii]) for jj in train: train_subs.append(class_list[ii][jj]) for jj in val: val_subs.append(class_list[ii][jj]) ''' model main ''' X_train, train_len, y_train, c_train = _get_seq(df, train_subs, args) X_val, val_len, y_val, c_val = _get_seq(df, val_subs, args) max_length = tf.reduce_max(train_len) ''' train regression model ''' then = time.time() model = GRURegressor(X_train, k_hidden=args.k_hidden, k_layers=args.k_layers, l2=args.l2, dropout=args.dropout, lr=args.lr) model.fit(X_train, y_train.reshape(y_train.shape[0], y_train.shape[1], 1), batch_size=args.batch_size, epochs=args.num_epochs, verbose=1, validation_split=0.2) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ''' s, s_t, _, _, _ = dnn_score(model, X_train, y_train, c_train, train_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['train_%s' % score][i_fold] = s[score] for ii in range(k_clip): results['t_train_%s' % score][ii][i_fold] = s_t[ii][score] print('train p = %0.3f' % s['p']) ''' results on val data ''' s, s_t, y, y_hat, c = dnn_score(model, X_val, y_val, c_val, val_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['val_%s' % score][i_fold] = s[score] for ii in range(k_clip): results['t_val_%s' % score][ii][i_fold] = s_t[ii][score] print('val p = %0.3f' % s['p']) results['y'][i_fold] = y results['y_hat'][i_fold] = y_hat results['c'][i_fold] = c return results
def _test(df, bhv_df, args): _info('test mode') # get X-y from df features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) k_clip = len(np.unique(df['c'])) print('number of clips = %d' % (k_clip)) subject_list = bhv_df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] # length of each clip clip_time = np.zeros(k_clip) for ii in range(k_clip): class_df = df[df['c'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float _info('seq lengths = %s' % clip_time) # init dict for all results results = {} for score in SCORES: # per clip temporal score results['t_train_%s' % score] = {} results['t_test_%s' % score] = {} for ii in range(k_clip): results['t_train_%s' % score][ii] = np.zeros(clip_time[ii]) results['t_test_%s' % score][ii] = np.zeros(clip_time[ii]) ''' model main ''' # get train, test sequences X_train, train_len, y_train, c_train = _get_seq(df, train_list, args) X_test, test_len, y_test, c_test = _get_seq(df, test_list, args) max_length = tf.reduce_max(train_len) ''' test regression model ''' then = time.time() model = GRURegressor(X_train, k_hidden=args.k_hidden, k_layers=args.k_layers, l2=args.l2, dropout=args.dropout, lr=args.lr) model.fit(X_train, y_train.reshape(y_train.shape[0], y_train.shape[1], 1), batch_size=args.batch_size, epochs=args.num_epochs, verbose=1, validation_split=0.2) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ''' s, s_t, _, _, _ = dnn_score(model, X_train, y_train, c_train, train_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['train_%s' % score] = s[score] for ii in range(k_clip): results['t_train_%s' % score][ii] = s_t[ii][score] print('train p = %0.3f' % s['p']) ''' results on test data ''' s, s_t, y, y_hat, c = dnn_score(model, X_test, y_test, c_test, test_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['test_%s' % score] = s[score] for ii in range(k_clip): results['t_test_%s' % score][ii] = s_t[ii][score] print('test p = %0.3f' % s['p']) results['y'] = y results['y_hat'] = y_hat results['c'] = c return results