示例#1
0
def TrainingModels(target_label, model_file_name, training_list):
    '''Randomly select num_training records to train, and test others.'''
    qt = QTloader()
    record_list = qt.getreclist()
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()
    for record_name in training_list:
        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], qt.getExpert(record_name))
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
示例#2
0
def Test1():
    '''Comparing to expert labels in QTdb.'''
    qt = QTloader()
    reclist = qt.getreclist()

    rec_ind = 0
    for rec_ind in xrange(0, len(reclist)):

        print 'Processing record[%d] %s ...' % (rec_ind, reclist[rec_ind])
        sig = qt.load(reclist[rec_ind])
        raw_sig = sig['sig']
        expert_labels = qt.getExpert(reclist[rec_ind])
        R_pos_list = [
            x[0] for x in filter(lambda item: item[1] == 'R', expert_labels)
        ]

        # Skip empty expert lists
        if len(R_pos_list) == 0:
            continue

        dpi = DPI()

        qrs_list = dpi.QRS_Detection(raw_sig)

        # Find FN
        FN_arr = GetFN(R_pos_list, qrs_list)
        R_pos_list = FN_arr

        if len(R_pos_list) > 0:
            plt.plot(raw_sig)
            amp_list = [raw_sig[x] for x in qrs_list]
            plt.plot(qrs_list, amp_list, 'ro', markersize=12)
            amp_list = [raw_sig[x] for x in R_pos_list]
            plt.plot(R_pos_list, amp_list, 'ys', markersize=14)
            plt.show()
示例#3
0
def TestQT(record_name, save_result_folder, model_folder, random_pattern_file_name):
    '''Test case1.'''
    fs = 250.0
    qt = QTloader()

    sig = qt.load(record_name)
    expert_annotations = qt.getExpert(record_name)
    pos_list, label_list = zip(*expert_annotations)
    test_range = [np.min(pos_list) - 100, np.max(pos_list) + 100]
    
    result_mat = list()

    print 'Lead1'
    raw_sig = sig['sig']
    results = TestSignal(raw_sig, fs, test_range, model_folder, random_pattern_file_name)
    for ind in xrange(0, len(results)):
        results[ind] = [results[ind][0] + test_range[0], results[ind][1]]
    result_mat.append((record_name, results))

    print 'Lead2'
    raw_sig = sig['sig2']
    results = TestSignal(raw_sig, fs, test_range, model_folder, random_pattern_file_name)
    for ind in xrange(0, len(results)):
        results[ind] = [results[ind][0] + test_range[0], results[ind][1]]
    result_mat.append((record_name + '_sig2', results))
    
    result_file_name = os.path.join(save_result_folder, '%s.json' % record_name)
    with open(result_file_name, 'w') as fout:
        json.dump(result_mat, fout, indent = 4)
        print 'Results saved as %s.' % result_file_name
def TEST_ExpertQRS():
    recname = 'sel103'
    QTdb = QTloader()
    rawsig = QTdb.load(recname)
    rawsig = rawsig['sig']
    MarkList = QTdb.getExpert(recname)

    swt = SWT_NoPredictQRS(rawsig, MarkList)
    swt.swt()

    # cDlist
    wtlist = swt.cDlist[-4]

    plt.figure(1)
    # plot Non QRS ECG & SWT
    plt.subplot(211)
    plt.plot(rawsig)
    plt.plot(wtlist)
    plt.grid(True)
    # plot Original ECG
    rawsig = swt.QTdb.load(recname)
    rawsig = rawsig['sig']
    rawsig = swt.crop_data_for_swt(rawsig)
    coeflist = pywt.swt(rawsig, 'db6', 9)
    cAlist, cDlist = zip(*coeflist)
    wtlist = cDlist[-4]

    plt.subplot(212)
    plt.plot(rawsig)
    plt.plot(wtlist)
    plt.grid(True)
    plt.show()
def TrainingModels(target_label, model_file_name, training_list):
    '''Randomly select num_training records to train, and test others.
    CP: Characteristic points
    '''
    qt = QTloader()
    record_list = qt.getreclist()
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()
    for record_name in training_list:
        CP_file_name = os.path.join(
            '/home/alex/code/Python/EcgCharacterPointMarks', target_label,
            '%s_poslist.json' % record_name)

        # Add expert marks
        expert_marks = qt.getExpert(record_name)
        CP_marks = [x for x in expert_marks if x[1] == target_label]
        if len(CP_marks) == 0:
            continue

        # Add manual labels if possible
        if os.path.exists(CP_file_name) == True:
            with open(CP_file_name, 'r') as fin:
                CP_info = json.load(fin)
                poslist = CP_info['poslist']
                if len(poslist) == 0:
                    continue
                CP_marks.extend(zip(poslist, [
                    target_label,
                ] * len(poslist)))

        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], CP_marks)
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
def ContinueAddQtTrainingSamples(walker, target_label):
    '''Add QT training samples.'''
    qt = QTloader()
    record_list = qt.getreclist()

    start_time = time.time()
    for record_name in record_list:

        # Add expert marks
        expert_marks = qt.getExpert(record_name)
        CP_marks = [x for x in expert_marks if x[1] == target_label]
        if len(CP_marks) == 0:
            continue

        print 'Collecting features from QT record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], CP_marks)
    def TrainQtRecords(self, record_list):
        '''API for QTdb: training model with given record_list.'''
        QTdb = QTloader()

        training_count = 1
        # Extracting feature from each record.
        for record_name in record_list:
            sig_struct = QTdb.load(record_name)
            raw_signal = sig_struct['sig']
            expert_labels = QTdb.getExpert(record_name)
            self.AddNewTrainingSignal(raw_signal, expert_labels)
            # Logging
            log.info('Extracted features from %s' % record_name)
            print '.' * training_count, '(%d/%d)' % (training_count, len(record_list))
            training_count += 1

        # Training with feature pool
        self.training()
def TrainingModels(target_label, model_file_name, training_list):
    '''Randomly select num_training records to train, and test others.'''
    qt = QTloader()
    record_list = qt.getreclist()
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()
    for record_name in training_list:
        Tonset_file_name = os.path.join(
            '/home/alex/code/Python/Tonset/results',
            '%s_poslist.json' % record_name)
        if os.path.exists(Tonset_file_name) == True:
            with open(Tonset_file_name, 'r') as fin:
                Tonset_info = json.load(fin)
                poslist = Tonset_info['poslist']
                if len(poslist) == 0:
                    continue
                Tonset_marks = zip(poslist, [
                    'Tonset',
                ] * len(poslist))
        else:
            expert_marks = qt.getExpert(record_name)
            Tonset_marks = [x for x in expert_marks if x[1] == 'Tonset']
            if len(Tonset_marks) == 0:
                continue

        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], Tonset_marks)
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
示例#9
0
    def TestQtRecords(self, save_folder, reclist=[]):
        '''API for QTdb: testing given record_list.'''
        QTdb = QTloader()

        print 'Testing:'
        testing_count = 1
        for record_name in reclist:
            # Logging
            log.info('Testing record %s' % record_name)
            print '.' * testing_count, '(%d/%d)' % (testing_count,
                                                    len(reclist))
            testing_count += 1

            sig_struct = QTdb.load(record_name)
            expert_labels = QTdb.getExpert(record_name)
            # Test lead1
            raw_signal = sig_struct['sig']
            predict_position_list = self.testing(raw_signal, expert_labels)
            test_result = zip(predict_position_list, [
                self.target_label,
            ] * len(predict_position_list))
            lead_result = [record_name, test_result]
            lead_result_list = []
            lead_result_list.append(lead_result)
            # Test lead2
            raw_signal = sig_struct['sig2']
            predict_position_list = self.testing(raw_signal, expert_labels)
            test_result = zip(predict_position_list, [
                self.target_label,
            ] * len(predict_position_list))
            lead_result = [record_name + '_sig2', test_result]
            lead_result_list.append(lead_result)

            # Save result.
            with open(
                    os.path.join(save_folder, 'result_{}'.format(record_name)),
                    'w') as fout:
                json.dump(lead_result_list, fout, indent=4)
def Test1(target_label='P', num_training=25):
    '''Test case 1: random walk.'''
    qt = QTloader()
    record_list = qt.getreclist()
    training_list = random.sample(record_list, num_training)
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config)

    start_time = time.time()
    for record_name in training_list:
        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], qt.getExpert(record_name))
    print 'random forest start training...'
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    for record_name in testing_list:
        sig = qt.load(record_name)
        raw_sig = sig['sig']

        seed_position = random.randint(100, len(raw_sig) - 100)
        plt.figure(1)
        plt.clf()
        plt.plot(sig['sig'], label=record_name)
        plt.title(target_label)
        for ti in xrange(0, 20):
            seed_position += random.randint(1, 200)
            print 'testing...(position: %d)' % seed_position
            start_time = time.time()
            results = walker.testing_walk(sig['sig'],
                                          seed_position,
                                          iterations=100,
                                          stepsize=10)
            print 'testing finished in %.3f seconds.' % (time.time() -
                                                         start_time)

            pos_list, values = zip(*results)
            predict_pos = np.mean(pos_list[len(pos_list) / 2:])

            # amp_list = [raw_sig[int(x)] for x in pos_list]
            amp_list = []
            bias = raw_sig[pos_list[0]]
            for pos in pos_list:
                amp_list.append(bias)
                bias -= 0.01

            plt.plot(predict_pos,
                     raw_sig[int(predict_pos)],
                     'ro',
                     markersize=14,
                     label='predict position')
            plt.plot(pos_list,
                     amp_list,
                     'r',
                     label='walk path',
                     markersize=3,
                     linewidth=8,
                     alpha=0.3)
            plt.xlim(min(pos_list) - 100, max(pos_list) + 100)
            plt.grid(True)
            plt.legend()
            plt.show(block=False)
            pdb.set_trace()
class HogFeatureExtractor(object):
    def __init__(self, target_label='P'):
        '''Hog 1D feature extractor.
        Inputs:
            target_label: label to detect. eg. 'T[(onset)|(offset)]{0,1}', 'P'
        '''
        self.qt = QTloader()

        # Feature length
        self.fixed_window_length = 250

        # Training Samples.
        self.signal_segments = []
        self.training_vector = []
        self.target_biases = []

        self.target_label = target_label

        self.hog = HogClass(segment_len=20)

        # ML models
        self.gbdt = None

    def GetDiffFeature(self, signal_segment, diff_step=4):
        '''Get Difference feature.'''

        hog_arr = self.hog.ComputeHog(signal_segment,
                                      diff_step=diff_step,
                                      debug_plot=False)
        current_feature_vector = np.array([])
        for hog_vec in hog_arr:
            current_feature_vector = np.append(current_feature_vector, hog_vec)
        return current_feature_vector

    def GetTrainingSamples(self, sig_in, expert_labels):
        '''Form Hog1D feature.'''
        # Make sure the x indexes are in ascending order.
        expert_labels.sort(key=lambda x: x[0])

        for expert_index in xrange(0, len(expert_labels)):
            pos, label = expert_labels[expert_index]
            if label != 'R':
                continue

            # Cut out the ECG segment that end with current R peak.
            signal_segment, target_bias = self.CutSegment(
                sig_in,
                expert_labels,
                expert_index,
                fixed_window_length=self.fixed_window_length)
            # Skip invalid values
            if target_bias is None:
                continue
            self.signal_segments.append(signal_segment)
            self.target_biases.append(target_bias)

            # plt.plot(signal_segment)
            # plt.plot(target_bias, np.mean(signal_segment), marker = 'd', markersize = 12)
            # plt.show()

            # hog_arr = self.hog.ComputeHog(signal_segment,
            # diff_step = 4,
            # debug_plot = False)
            # # plt.plot(signal_segment)
            # # plt.grid(True)
            # # plt.show()
            # current_feature_vector = np.array([])
            # for hog_vec in hog_arr:
            # current_feature_vector = np.append(current_feature_vector,
            # hog_vec);
            current_feature_vector = np.array([])
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=1))
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=4))
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=8))

            self.training_vector.append(current_feature_vector)

    def Train(self, reclist):
        '''Training with Qt data.'''
        for rec_name in reclist:
            sig_struct = self.qt.load(rec_name)
            raw_signal = sig_struct['sig']

            # Expert samples from Qt database
            expert_labels = self.qt.getExpert(rec_name)

            # Collect training vectors
            self.GetTrainingSamples(raw_signal, expert_labels)

            # Check
            # fixed_len = len(self.training_vector[0])
            # for vec in self.training_vector:
            # if len(vec) != fixed_len:
            # print 'Error: new len:', len(vec)
        for vec in self.training_vector:
            for val in vec:
                if isinstance(val, float) == False:
                    raise Exception('val = {}'.format(val))
        # Training GBDT models
        self.gbdt = GradientBoostingRegressor(n_estimators=100,
                                              learning_rate=0.1,
                                              max_depth=1,
                                              random_state=0,
                                              loss='ls').fit(
                                                  self.training_vector,
                                                  self.target_biases)

    def LoadModel(self, model_object):
        '''Load Model object.'''
        self.gbdt = model_object

    def Testing(self, sig_in, expert_labels):
        '''Testing given ECG.'''

        detected_positions = list()
        # debug
        # debug_count = 7
        for expert_index in xrange(0, len(expert_labels)):
            pos, label = expert_labels[expert_index]
            if label != 'R':
                continue

            # debug_count -= 1
            # if debug_count < 0:
            # break

            # Cut out the ECG segment that end with current R peak.
            signal_segment, target_bias = self.CutSegment(
                sig_in, expert_labels, expert_index, fixed_window_length=250)
            # Testing
            current_feature_vector = np.array([])
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=1))
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=4))
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=8))

            current_feature_vector = current_feature_vector.reshape(1, -1)
            predict_pos = self.gbdt.predict(current_feature_vector)

            # print 'Predict position:', predict_pos

            # Display results
            local_pos = predict_pos + self.fixed_window_length - 1
            local_pos = int(local_pos)
            # plt.plot(signal_segment)
            # plt.plot(local_pos, signal_segment[local_pos], marker = 'o',
            # markersize = 12)
            # plt.grid(True)
            # plt.title('Testing function')
            # plt.show()

            # Append the global position
            detected_positions.append(predict_pos + pos)

        return detected_positions

    def TestingQt(self, record_name):
        sig_struct = self.qt.load(record_name)
        sig_in = sig_struct['sig']
        expert_labels = self.qt.getExpert(record_name)

        # debug
        debug_count = 7
        for expert_index in xrange(0, len(expert_labels)):
            pos, label = expert_labels[expert_index]
            if label != 'R':
                continue

            debug_count -= 1
            if debug_count < 0:
                break
            # Cut out the ECG segment that end with current R peak.
            signal_segment, target_bias = self.CutSegment(
                sig_in, expert_labels, expert_index, fixed_window_length=250)
            # Testing
            current_feature_vector = np.array([])
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=1))
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=4))
            current_feature_vector = np.append(
                current_feature_vector,
                self.GetDiffFeature(signal_segment, diff_step=8))

            predict_pos = self.gbdt.predict(current_feature_vector)

            print 'Predict position:', predict_pos

            # Display results
            local_pos = predict_pos + self.fixed_window_length - 1
            local_pos = int(local_pos)
            plt.plot(signal_segment)
            plt.plot(local_pos,
                     signal_segment[local_pos],
                     marker='o',
                     markersize=12)
            plt.grid(True)
            plt.title(record_name)
            plt.show()

    def CutSegment_T(self,
                     sig_in,
                     expert_labels,
                     expert_index,
                     fixed_window_length=250 * 1):
        '''Get equal length signal_segments starts at expert_index.
        Inputs:
            sig_in: Input ECG signal.
            expert_labels: Annotation list of form [(pos, label), ...]
            expert_index: The index of the element in expert_labels that
                          has label 'R'.
            fixed_window_length : return signal's length
        Returns:
            signal_segment: Cropped signal segment.
            target_bias: (May be None)The bias respect to the expert_index's
                         position.
        '''
        current_R_pos = expert_labels[expert_index][0]
        ecg_segment = np.zeros(fixed_window_length)
        left_bound = max(0, current_R_pos - fixed_window_length + 1)
        right_bound = min(current_R_pos + fixed_window_length - 1,
                          len(sig_in) - 1)
        len_ecg_data = abs(current_R_pos - right_bound) + 1
        ecg_segment[:len_ecg_data] = np.array(
            sig_in[current_R_pos:current_R_pos + len_ecg_data])

        previous_R_pos = None
        next_T_pos = None
        for ind in xrange(expert_index + 1, len(expert_labels)):
            cur_pos, cur_label = expert_labels[ind]
            if cur_label == 'R':
                if previous_R_pos is None:
                    previous_R_pos = cur_pos
                else:
                    break
            if cur_label == self.target_label:
                if next_T_pos is None:
                    next_T_pos = cur_pos
                else:
                    break
            if abs(current_R_pos - cur_pos) >= fixed_window_length:
                break

        if next_T_pos is not None:
            if abs(current_R_pos - next_T_pos) >= fixed_window_length:
                local_next_T_pos = None
            else:
                # Bias respect to current_R_pos
                local_next_T_pos = next_T_pos - current_R_pos
        else:
            local_next_T_pos = None

        return ecg_segment, local_next_T_pos

    def CutSegment(self,
                   sig_in,
                   expert_labels,
                   expert_index,
                   fixed_window_length=250 * 1):
        '''Get equal length signal_segments starts or ends at expert_index.
        Inputs:
            sig_in: Input ECG signal.
            expert_labels: Annotation list of form [(pos, label), ...]
            expert_index: The index of the element in expert_labels that
                          has label 'R'.
            fixed_window_length : return signal's length
        Returns:
            signal_segment: Cropped signal segment.
            target_bias: (May be None)The bias respect to the expert_index's
                         position.
        '''
        # Search T wave
        if 'T' in self.target_label:
            return self.CutSegment_T(sig_in,
                                     expert_labels,
                                     expert_index,
                                     fixed_window_length=fixed_window_length)

        current_R_pos = expert_labels[expert_index][0]
        ecg_segment = np.zeros(fixed_window_length)
        left_bound = max(0, current_R_pos - fixed_window_length + 1)
        len_ecg_data = current_R_pos - left_bound + 1
        ecg_segment[fixed_window_length - len_ecg_data:] = np.array(
            sig_in[left_bound:current_R_pos + 1])

        previous_R_pos = None
        previous_P_pos = None
        for ind in xrange(expert_index - 1, -1, -1):
            cur_pos, cur_label = expert_labels[ind]
            if cur_label == 'R' and previous_R_pos is None:
                previous_R_pos = cur_pos
            if cur_label == self.target_label and previous_P_pos is None:
                previous_P_pos = cur_pos

        # Eliminate previous R wave
        #
        # plt.plot(ecg_segment)
        # if previous_R_pos is not None:
        # local_previous_R_pos = previous_R_pos - current_R_pos + fixed_window_length - 1
        # if local_previous_R_pos >= 0:
        # plt.plot(fixed_window_length - (current_R_pos - previous_R_pos), np.mean(ecg_segment), marker = 'd', markersize = 12)
        # plt.show()

        if previous_P_pos is not None:
            if current_R_pos - previous_P_pos >= fixed_window_length:
                local_previous_P_pos = None
            else:
                # Bias respect to current_R_pos
                local_previous_P_pos = previous_P_pos - current_R_pos
        else:
            local_previous_P_pos = None

        return ecg_segment, local_previous_P_pos
示例#12
0
def RoundTest(target_label, result_folder, num_training = 75):
    '''Randomly select num_training records to train, and test others.'''
    qt = QTloader()
    record_list = qt.getreclist()
    must_train_list = [
        "sel35", 
        "sel36", 
        "sel31", 
        "sel38", 
        "sel39", 
        "sel820", 
        "sel51", 
        "sele0104", 
        "sele0107", 
        "sel223", 
        "sele0607", 
        "sel102", 
        "sele0409", 
        "sel41", 
        "sel40", 
        "sel43", 
        "sel42", 
        "sel45", 
        "sel48", 
        "sele0133", 
        "sele0116", 
        "sel14172", 
        "sele0111", 
        "sel213", 
        "sel14157", 
        "sel301"
            ]
    num_training -= len(must_train_list)
    record_list = list(set(record_list) - set(must_train_list))
    training_list = must_train_list
    if num_training > 0:
        training_list.extend(random.sample(record_list, num_training))
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(
            max_depth = 10)
    walker = RandomWalker(target_label = target_label,
            random_forest_config = random_forest_config)

    start_time = time.time()
    for record_name in training_list:
        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], qt.getExpert(record_name))
    print 'random forest start training...'
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    for record_name in testing_list:
        print 'testing record %s...' % record_name
        record_result = list()
        sig = qt.load(record_name)
        raw_sig = sig['sig']
        record_result.append((record_name, testing(walker, raw_sig)))
        raw_sig = sig['sig2']
        record_result.append((record_name + '_sig2', testing(walker, raw_sig)))
        # Write to json
        with open(os.path.join(result_folder, '%s.json' % record_name), 'w') as fout:
            json.dump(record_result, fout, indent = 4)