示例#1
0
def GetModels(model_folder, pattern_file_name):
    '''Returns model dict.'''
    # label_list = ['P', 'Ponset', 'Poffset',
    # 'T', 'Toffset', 'Tonset']
    label_list = [
        'P', 'Ponset', 'Poffset', 'T', 'Toffset', 'Tonset', 'Ronset', 'Roffset'
    ]
    # label_list = ['P', 'Ponset', 'Poffset']
    # label_list = ['P', ]
    bias_list = [
        -0.19,
        -0.195,
        -0.185,
        0.26,
        0.27,
        0.1,
        -0.02,
        0.02,
    ]
    # Get model dict
    models = list()
    for target_label, bias in zip(label_list, bias_list):
        model_file_name = os.path.join(model_folder, target_label + '.mdl')
        walker_model = RandomWalker(target_label=target_label,
                                    random_pattern_file_name=pattern_file_name)
        walker_model.load_model(model_file_name)
        models.append((walker_model, bias, target_label))
    return models
示例#2
0
def TrainingModels(target_label, model_file_name, training_list):
    '''Randomly select num_training records to train, and test others.'''
    qt = QTloader()
    record_list = qt.getreclist()
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()
    for record_name in training_list:
        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], qt.getExpert(record_name))
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
示例#3
0
    def estimates(self, num_estimates, start_node=None):
        """
        Generate estimates of the property.
        :param num_estimates: Maximum number of estimates to provide.
        :param start_node: (Optional) Custom starting node.
        :return: Yields estimates (Python generator style)
        """
        rw = RandomWalker(self.G, self._edge_weight_func, self._accum_func)

        if start_node is None:
            start_node = self.compute_start_node()

        prev_t = 0
        for k, t, accum in rw.return_times(start_node, num_estimates):
            met = self._compute_metric(start_node, k, t, accum)
            yield k - 1, met, t - prev_t
            prev_t = t
def TrainingModels(target_label, model_file_name, training_list):
    '''Randomly select num_training records to train, and test others.
    CP: Characteristic points
    '''
    qt = QTloader()
    record_list = qt.getreclist()
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()
    for record_name in training_list:
        CP_file_name = os.path.join(
            '/home/alex/code/Python/EcgCharacterPointMarks', target_label,
            '%s_poslist.json' % record_name)

        # Add expert marks
        expert_marks = qt.getExpert(record_name)
        CP_marks = [x for x in expert_marks if x[1] == target_label]
        if len(CP_marks) == 0:
            continue

        # Add manual labels if possible
        if os.path.exists(CP_file_name) == True:
            with open(CP_file_name, 'r') as fin:
                CP_info = json.load(fin)
                poslist = CP_info['poslist']
                if len(poslist) == 0:
                    continue
                CP_marks.extend(zip(poslist, [
                    target_label,
                ] * len(poslist)))

        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], CP_marks)
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
def TrainingModels_Changgeng(target_label, model_file_name):
    '''Randomly select num_training records to train, and test others.
    CP: Characteristic points
    '''
    from changgengLoader import ECGLoader
    ecg = ECGLoader(500, current_folderpath)

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()

    for record_ind in xrange(0, len(ecg.P_faillist)):
        record_name = ecg.P_faillist[record_ind]
        CP_file_name = os.path.join(current_folderpath, 'data', 'labels',
                                    target_label,
                                    '%s_poslist.json' % record_name)

        CP_marks = []

        # Add manual labels if possible
        if os.path.exists(CP_file_name) == True:
            with open(CP_file_name, 'r') as fin:
                print 'Collecting features from record %s.' % record_name
                CP_info = json.load(fin)
                poslist = CP_info['poslist']
                if len(poslist) == 0:
                    continue
                CP_marks.extend(zip(poslist, [
                    target_label,
                ] * len(poslist)))

        sig = ecg.load(record_ind)

        walker.collect_training_data(sig[0], CP_marks)
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
def TrainingModels(target_label, model_file_name, training_list):
    '''Randomly select num_training records to train, and test others.'''
    qt = QTloader()
    record_list = qt.getreclist()
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()
    for record_name in training_list:
        Tonset_file_name = os.path.join(
            '/home/alex/code/Python/Tonset/results',
            '%s_poslist.json' % record_name)
        if os.path.exists(Tonset_file_name) == True:
            with open(Tonset_file_name, 'r') as fin:
                Tonset_info = json.load(fin)
                poslist = Tonset_info['poslist']
                if len(poslist) == 0:
                    continue
                Tonset_marks = zip(poslist, [
                    'Tonset',
                ] * len(poslist))
        else:
            expert_marks = qt.getExpert(record_name)
            Tonset_marks = [x for x in expert_marks if x[1] == 'Tonset']
            if len(Tonset_marks) == 0:
                continue

        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], Tonset_marks)
    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
def Test1(target_label='P', num_training=25):
    '''Test case 1: random walk.'''
    qt = QTloader()
    record_list = qt.getreclist()
    training_list = random.sample(record_list, num_training)
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config)

    start_time = time.time()
    for record_name in training_list:
        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], qt.getExpert(record_name))
    print 'random forest start training...'
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    for record_name in testing_list:
        sig = qt.load(record_name)
        raw_sig = sig['sig']

        seed_position = random.randint(100, len(raw_sig) - 100)
        plt.figure(1)
        plt.clf()
        plt.plot(sig['sig'], label=record_name)
        plt.title(target_label)
        for ti in xrange(0, 20):
            seed_position += random.randint(1, 200)
            print 'testing...(position: %d)' % seed_position
            start_time = time.time()
            results = walker.testing_walk(sig['sig'],
                                          seed_position,
                                          iterations=100,
                                          stepsize=10)
            print 'testing finished in %.3f seconds.' % (time.time() -
                                                         start_time)

            pos_list, values = zip(*results)
            predict_pos = np.mean(pos_list[len(pos_list) / 2:])

            # amp_list = [raw_sig[int(x)] for x in pos_list]
            amp_list = []
            bias = raw_sig[pos_list[0]]
            for pos in pos_list:
                amp_list.append(bias)
                bias -= 0.01

            plt.plot(predict_pos,
                     raw_sig[int(predict_pos)],
                     'ro',
                     markersize=14,
                     label='predict position')
            plt.plot(pos_list,
                     amp_list,
                     'r',
                     label='walk path',
                     markersize=3,
                     linewidth=8,
                     alpha=0.3)
            plt.xlim(min(pos_list) - 100, max(pos_list) + 100)
            plt.grid(True)
            plt.legend()
            plt.show(block=False)
            pdb.set_trace()
def TrainingModels_Changgeng(target_label, model_file_name):
    '''Randomly select num_training records to train, and test others.
    CP: Characteristic points
    '''

    import glob
    annot_jsonIDs = glob.glob(
        os.path.join(current_folderpath, 'data', 'labels', target_label,
                     '*.json'))
    annot_jsonIDs = [os.path.split(x)[-1] for x in annot_jsonIDs]
    annot_jsonIDs = [x.split('.')[0] for x in annot_jsonIDs]
    # skip failed records
    faillist = [
        8999, 8374, 6659, 6655, 6059, 5395, 1401, 1269, 737, 75, 9524, 9476
    ]
    faillist = [str(x) for x in faillist]
    annot_jsonIDs = list(set(annot_jsonIDs) - set(faillist))

    from changgengLoader import ECGLoader
    ecg = ECGLoader(500, current_folderpath)

    random_forest_config = dict(max_depth=10)
    walker = RandomWalker(target_label=target_label,
                          random_forest_config=random_forest_config,
                          random_pattern_file_name=os.path.join(
                              os.path.dirname(model_file_name),
                              'random_pattern.json'))

    start_time = time.time()

    for record_ind in xrange(0, len(annot_jsonIDs)):
        record_name = annot_jsonIDs[record_ind]
        CP_file_name = os.path.join(current_folderpath, 'data', 'labels',
                                    target_label, '%s.json' % record_name)

        CP_marks = []

        # Add manual labels if possible
        if os.path.exists(CP_file_name) == True:
            with open(CP_file_name, 'r') as fin:
                CP_info = json.load(fin)
                poslist = CP_info['poslist']
                poslist = [int(x / 2) for x in poslist]
                mat_file_name = CP_info['mat_file_name']
                if len(poslist) == 0:
                    continue
                CP_marks.extend(zip(poslist, [
                    target_label,
                ] * len(poslist)))

        print 'Collecting features from record %s.' % record_name
        sig = ecg.load(record_name)
        raw_sig = sig[0]

        import scipy.signal
        resampled_sig = scipy.signal.resample_poly(raw_sig, 1, 2)
        raw_sig = resampled_sig
        # debug
        # plt.figure(1)
        # plt.plot(raw_sig, label = 'signal')
        # plt.plot(xrange(0, len(raw_sig), 2), resampled_sig, label = 'resmaple')
        # plt.legend()
        # plt.grid(True)
        # plt.title(record_name)
        # plt.show()

        walker.collect_training_data(raw_sig, CP_marks)

    # Add QT training samples
    # ContinueAddQtTrainingSamples(walker, target_label)

    print 'random forest start training(%s)...' % target_label
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    import joblib
    start_time = time.time()
    walker.save_model(model_file_name)
    print 'Serializing model time cost %f' % (time.time() - start_time)
def RoundTest(target_label, result_folder, num_training = 75):
    '''Randomly select num_training records to train, and test others.'''
    qt = QTloader()
    record_list = qt.getreclist()
    must_train_list = [
        "sel35", 
        "sel36", 
        "sel31", 
        "sel38", 
        "sel39", 
        "sel820", 
        "sel51", 
        "sele0104", 
        "sele0107", 
        "sel223", 
        "sele0607", 
        "sel102", 
        "sele0409", 
        "sel41", 
        "sel40", 
        "sel43", 
        "sel42", 
        "sel45", 
        "sel48", 
        "sele0133", 
        "sele0116", 
        "sel14172", 
        "sele0111", 
        "sel213", 
        "sel14157", 
        "sel301"
            ]
    num_training -= len(must_train_list)
    record_list = list(set(record_list) - set(must_train_list))
    training_list = must_train_list
    if num_training > 0:
        training_list.extend(random.sample(record_list, num_training))
    testing_list = list(set(record_list) - set(training_list))

    random_forest_config = dict(
            max_depth = 10)
    walker = RandomWalker(target_label = target_label,
            random_forest_config = random_forest_config)

    start_time = time.time()
    for record_name in training_list:
        print 'Collecting features from record %s.' % record_name
        sig = qt.load(record_name)
        walker.collect_training_data(sig['sig'], qt.getExpert(record_name))
    print 'random forest start training...'
    walker.training()
    print 'trianing used %.3f seconds' % (time.time() - start_time)

    for record_name in testing_list:
        print 'testing record %s...' % record_name
        record_result = list()
        sig = qt.load(record_name)
        raw_sig = sig['sig']
        record_result.append((record_name, testing(walker, raw_sig)))
        raw_sig = sig['sig2']
        record_result.append((record_name + '_sig2', testing(walker, raw_sig)))
        # Write to json
        with open(os.path.join(result_folder, '%s.json' % record_name), 'w') as fout:
            json.dump(record_result, fout, indent = 4)