def get_connections(site1, site2): ''' return mask of rows which are fragments common to both sites ''' return (~np.isnan(site1)) & (~np.isnan(site2)) def get_best_f1_ind(precision, recall): fscore = (2 * precision * recall) / (precision + recall) return np.argmax(fscore) if __name__ == '__main__': # load preprocessed data fragments, qualities, variant_labels = load_preprocessed( 'data/preprocessed/chr20_1-1M.npz' ) matrix_sparsity_info(fragments, print_info=True) # get undirected edge weights min_connections = 2 edges = np.ma.masked_all((fragments.shape[1], fragments.shape[1])) # to catch places where phi undefined and therefore should be no edge with warnings.catch_warnings(): warnings.simplefilter("error") for i, j in tqdm(zip(*np.tril_indices(fragments.shape[1], k=-1)), desc='edges', total=((fragments.shape[1] ** 2) / 2 - fragments.shape[1])):
if thresh_ind < len(thresholds): thresh = -thresholds[thresh_ind] else: thresh = 0 preds = scores > thresh print(preds.sum()) return confusion_matrix(variant_labels, preds, normalize=normalize), None if __name__ == '__main__': # load preprocessed data _, _, variant_labels = load_preprocessed( 'data/preprocessed/chr20_1-1M.npz') save_dir = 'data/results' # original res_saves = [ ('phi_correlation_1M.npy', 'phi correlation'), ('likelihood_w1_1M.npy', 'likelihood w=1'), ('likelihood_w2_1M.npy', 'likelihood w=2'), ('likelihood_w3_1M.npy', 'likelihood w=3'), ('likelihood_w4_1M.npy', 'likelihood w=4'), ('likelihood_w5_1M.npy', 'likelihood w=5'), ] results = [(np.load(os.path.join(save_dir, s[0])), s[1]) for s in res_saves]
parser.add_argument('-n', '--n_splits', type=int, action='store', default=10) return parser if __name__ == "__main__": scaler = None parser = gen_arg_parser() args = parser.parse_args() merged_data = load_preprocessed(args.data_dir_root, simple=args.merge_nrem, merge_keys=['stages', 'pows']) if args.scaler: scaler = StandardScaler() scaler.fit(merged_data['pows']) if args.mode == 'merged': data = merged_data else: # Implement this pass if args.merge_nrem == True: sleep_stages = data['stages_simple'] else: sleep_stages = data['stages'][:, 2].astype(np.int8)
import os import numpy as np import scipy.io as sio from sklearn.preprocessing import StandardScaler from nn_models import basic_rnn from utils import rolling_window, load_preprocessed # Load preprocessed files all_data = load_preprocessed() data = all_data[0] def timestep_slice_data(data, slice_size=10, rescale=True): # Load inputs and outputs labels = data['stages'][:, 2] pows = data['pows'] if rescale: scaler = StandardScaler() scaler.fit(pows) pows = scaler.transform(pows) pows = pows.swapaxes(0, 1) # timeslice labels [ N,slice_size ] seq_labels = rolling_window(labels, slice_size) # timeslicing pows is awkward... seq_pows = rolling_window(pows, slice_size) seq_pows = seq_pows.swapaxes(0, 1) seq_pows = seq_pows.swapaxes(1, 2)