def TrainingModels_Changgeng(target_label, model_file_name): '''Randomly select num_training records to train, and test others. CP: Characteristic points ''' from changgengLoader import ECGLoader ecg = ECGLoader(500, current_folderpath) random_forest_config = dict(max_depth=10) walker = RandomWalker(target_label=target_label, random_forest_config=random_forest_config, random_pattern_file_name=os.path.join( os.path.dirname(model_file_name), 'random_pattern.json')) start_time = time.time() for record_ind in xrange(0, len(ecg.P_faillist)): record_name = ecg.P_faillist[record_ind] CP_file_name = os.path.join(current_folderpath, 'data', 'labels', target_label, '%s_poslist.json' % record_name) CP_marks = [] # Add manual labels if possible if os.path.exists(CP_file_name) == True: with open(CP_file_name, 'r') as fin: print 'Collecting features from record %s.' % record_name CP_info = json.load(fin) poslist = CP_info['poslist'] if len(poslist) == 0: continue CP_marks.extend(zip(poslist, [ target_label, ] * len(poslist))) sig = ecg.load(record_ind) walker.collect_training_data(sig[0], CP_marks) print 'random forest start training(%s)...' % target_label walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) import joblib start_time = time.time() walker.save_model(model_file_name) print 'Serializing model time cost %f' % (time.time() - start_time)
def TrainingModels_Changgeng(target_label, model_file_name): '''Randomly select num_training records to train, and test others. CP: Characteristic points ''' import glob annot_jsonIDs = glob.glob( os.path.join(current_folderpath, 'data', 'labels', target_label, '*.json')) annot_jsonIDs = [os.path.split(x)[-1] for x in annot_jsonIDs] annot_jsonIDs = [x.split('.')[0] for x in annot_jsonIDs] # skip failed records faillist = [ 8999, 8374, 6659, 6655, 6059, 5395, 1401, 1269, 737, 75, 9524, 9476 ] faillist = [str(x) for x in faillist] annot_jsonIDs = list(set(annot_jsonIDs) - set(faillist)) from changgengLoader import ECGLoader ecg = ECGLoader(500, current_folderpath) random_forest_config = dict(max_depth=10) walker = RandomWalker(target_label=target_label, random_forest_config=random_forest_config, random_pattern_file_name=os.path.join( os.path.dirname(model_file_name), 'random_pattern.json')) start_time = time.time() for record_ind in xrange(0, len(annot_jsonIDs)): record_name = annot_jsonIDs[record_ind] CP_file_name = os.path.join(current_folderpath, 'data', 'labels', target_label, '%s.json' % record_name) CP_marks = [] # Add manual labels if possible if os.path.exists(CP_file_name) == True: with open(CP_file_name, 'r') as fin: CP_info = json.load(fin) poslist = CP_info['poslist'] poslist = [int(x / 2) for x in poslist] mat_file_name = CP_info['mat_file_name'] if len(poslist) == 0: continue CP_marks.extend(zip(poslist, [ target_label, ] * len(poslist))) print 'Collecting features from record %s.' % record_name sig = ecg.load(record_name) raw_sig = sig[0] import scipy.signal resampled_sig = scipy.signal.resample_poly(raw_sig, 1, 2) raw_sig = resampled_sig # debug # plt.figure(1) # plt.plot(raw_sig, label = 'signal') # plt.plot(xrange(0, len(raw_sig), 2), resampled_sig, label = 'resmaple') # plt.legend() # plt.grid(True) # plt.title(record_name) # plt.show() walker.collect_training_data(raw_sig, CP_marks) # Add QT training samples # ContinueAddQtTrainingSamples(walker, target_label) print 'random forest start training(%s)...' % target_label walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) import joblib start_time = time.time() walker.save_model(model_file_name) print 'Serializing model time cost %f' % (time.time() - start_time)
def TestChanggeng(record_ind): '''Test case1.''' def RunWalkerModel(walker_model, seed_positions, confined_ranges, feature_extractor): '''Run random walk detection model. Input: walker_model: random walk regressor for a certain label. seed_positions: list of seed position confined_ranges: list of confined_range ''' if abs(fs - 250.0) > 1e-6: raise Exception('Bias has default fs = 250.0Hz!') print 'fs = ', fs # First add to prepare testing list for seed_position, confined_range in zip(seed_positions, confined_ranges): walker_model.prepareTestSample(seed_position, confined_range) start_time = time.time() # Second, Testing all prepared positions path_list, scores_list = walker_model.runPreparedTesting( feature_extractor, iterations=200, stepsize=4) results = list() for path in path_list: # Tnew_list.append(len(set(path))) predict_position = int(np.mean(path[len(path) / 2:]) / 250.0 * fs) # For return value of super function results.append((predict_position, walker_model.target_label)) return (results, path_list) import matplotlib.pyplot as plt import random fs = 250.0 from changgengLoader import ECGLoader ecg = ECGLoader(500, current_folderpath) record_name = ecg.P_faillist[record_ind] sig = ecg.load(record_name) raw_sig = sig[0] import scipy.signal # raw_sig = Denoise(raw_sig) resampled_sig = scipy.signal.resample_poly(raw_sig, 1, 2) # plt.figure(1) # plt.plot(raw_sig, label = 'signal') # plt.plot(xrange(0, len(raw_sig), 2), resampled_sig, label = 'resmaple') # plt.legend() # plt.grid(True) # plt.title(record_name) # plt.show() raw_sig = resampled_sig model_folder = '/home/chenbin/hyf/Sourecode/Sourecode/ECG_random_walk/randomwalk/data/Lw3Np4000/improved' pattern_file_name = '/home/chenbin/hyf/Sourecode/Sourecode/ECG_random_walk/randomwalk/data/Lw3Np4000/random_pattern.json' model_list = GetModels(model_folder, pattern_file_name) start_time = time.time() # Start Testing results = list() # results = Testing_random_walk(raw_sig, 250.0, r_list, model_list) # results = Testing(raw_sig, 250.0, model_list, walker_iterations = 200) feature_extractor = model_list[0][0].GetFeatureExtractor(raw_sig) for walker_model, bias, model_label in model_list: if model_label != 'P': continue print 'Testing model label:', model_label seeds = list() confined_ranges = list() for pos in xrange(1, len(raw_sig), 200): seeds.append(pos) confined_ranges.append([0, len(raw_sig) - 1]) seed_results, path_list = RunWalkerModel(walker_model, seeds, confined_ranges, feature_extractor) results.extend(seed_results) print 'Testing time cost %f secs.' % (time.time() - start_time) samples_count = len(raw_sig) time_span = samples_count / fs #print 'Span of testing range: %f samples(%f seconds).' % (samples_count, time_span) # Display results plt.figure(1) plt.clf() plt.plot(raw_sig, label='ECG') pos_list, label_list = zip(*results) labels = set(label_list) for label in labels: if label != 'P': continue pos_list = [int(x[0]) for x in results if x[1] == label] amp_list = [raw_sig[x] for x in pos_list] marker = 'o' if len(label) == 1 else '|' plt.plot(pos_list, amp_list, marker=marker, linestyle='none', markeredgewidth=5, markersize=15, alpha=0.85, markerfacecolor='none', markeredgecolor=( random.random(), random.random(), random.random(), ), label=label) # Plot path for path, up_amplitude in zip(path_list, amp_list): plt.plot(path, xrange(up_amplitude, up_amplitude - int(len(path) * 0.01) + 1, 0.01), 'r', alpha=0.43) # Plot failed test fail_results = ecg.loadAnnot(record_name, target_label='P') pos_list = [int(x[0] / 2) for x in fail_results if x[1] == 'P'] amp_list = [raw_sig[x] for x in pos_list] plt.plot(pos_list, amp_list, 'x', markersize=15, markeredgewidth=5, alpha=0.5, label='failed') plt.title(record_name) plt.grid(True) plt.legend() plt.show(block=False) pdb.set_trace()